Message ID | 20180912003640.28316-5-joe@wand.net.nz |
---|---|
State | Changes Requested, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | Add socket lookup support | expand |
On Tue, Sep 11, 2018 at 05:36:33PM -0700, Joe Stringer wrote: > Teach the verifier a little bit about a new type of pointer, a > PTR_TO_SOCKET. This pointer type is accessed from BPF through the > 'struct bpf_sock' structure. > > Signed-off-by: Joe Stringer <joe@wand.net.nz> > --- > include/linux/bpf.h | 17 +++++ > include/linux/bpf_verifier.h | 2 + > kernel/bpf/verifier.c | 125 ++++++++++++++++++++++++++++++----- > net/core/filter.c | 30 +++++---- > 4 files changed, 147 insertions(+), 27 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 523481a3471b..6ec93f3d66dd 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -154,6 +154,7 @@ enum bpf_arg_type { > > ARG_PTR_TO_CTX, /* pointer to context */ > ARG_ANYTHING, /* any (initialized) argument is ok */ > + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ > }; > > /* type of values returned from helper functions */ > @@ -162,6 +163,7 @@ enum bpf_return_type { > RET_VOID, /* function doesn't return anything */ > RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ > RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ > + RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ > }; > > /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs > @@ -212,6 +214,8 @@ enum bpf_reg_type { > PTR_TO_PACKET_META, /* skb->data - meta_len */ > PTR_TO_PACKET, /* reg points to skb->data */ > PTR_TO_PACKET_END, /* skb->data + headlen */ > + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ > + PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ > }; > > /* The information passed from prog-specific *_is_valid_access > @@ -334,6 +338,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void); > > typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, > unsigned long off, unsigned long len); > +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, > + const struct bpf_insn *src, > + struct bpf_insn *dst, > + struct bpf_prog *prog, > + u32 *target_size); > > u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, > void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); > @@ -827,4 +836,12 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto; > void bpf_user_rnd_init_once(void); > u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); > > +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, > + struct bpf_insn_access_aux *info); > +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, > + const struct bpf_insn *si, > + struct bpf_insn *insn_buf, > + struct bpf_prog *prog, > + u32 *target_size); > + > #endif /* _LINUX_BPF_H */ > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h > index af262b97f586..23a2b17bfd75 100644 > --- a/include/linux/bpf_verifier.h > +++ b/include/linux/bpf_verifier.h > @@ -58,6 +58,8 @@ struct bpf_reg_state { > * offset, so they can share range knowledge. > * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we > * came from, when one is tested for != NULL. > + * For PTR_TO_SOCKET this is used to share which pointers retain the > + * same reference to the socket, to determine proper reference freeing. > */ > u32 id; > /* For scalar types (SCALAR_VALUE), this represents our knowledge of > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index f2357c8c90de..111e031cf65d 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { > * (like pointer plus pointer becomes SCALAR_VALUE type) > * > * When verifier sees load or store instructions the type of base register > - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer > - * types recognized by check_mem_access() function. > + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are > + * four pointer types recognized by check_mem_access() function. > * > * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' > * and the range of [ptr, ptr + map's value_size) is accessible. > @@ -266,6 +266,8 @@ static const char * const reg_type_str[] = { > [PTR_TO_PACKET] = "pkt", > [PTR_TO_PACKET_META] = "pkt_meta", > [PTR_TO_PACKET_END] = "pkt_end", > + [PTR_TO_SOCKET] = "sock", > + [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", > }; > > static char slot_type_char[] = { > @@ -971,6 +973,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) > case PTR_TO_PACKET_META: > case PTR_TO_PACKET_END: > case CONST_PTR_TO_MAP: > + case PTR_TO_SOCKET: > + case PTR_TO_SOCKET_OR_NULL: > return true; > default: > return false; > @@ -1326,6 +1330,28 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, > return -EACCES; > } > > +static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, > + int size, enum bpf_access_type t) > +{ > + struct bpf_reg_state *regs = cur_regs(env); > + struct bpf_reg_state *reg = ®s[regno]; > + struct bpf_insn_access_aux info; > + > + if (reg->smin_value < 0) { > + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", > + regno); > + return -EACCES; > + } > + > + if (!bpf_sock_is_valid_access(off, size, t, &info)) { > + verbose(env, "invalid bpf_sock_ops access off=%d size=%d\n", > + off, size); > + return -EACCES; > + } > + > + return 0; > +} > + > static bool __is_pointer_value(bool allow_ptr_leaks, > const struct bpf_reg_state *reg) > { > @@ -1441,6 +1467,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, > */ > strict = true; > break; > + case PTR_TO_SOCKET: > + pointer_desc = "sock "; > + break; > default: > break; > } > @@ -1697,6 +1726,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn > err = check_packet_access(env, regno, off, size, false); > if (!err && t == BPF_READ && value_regno >= 0) > mark_reg_unknown(env, regs, value_regno); > + > + } else if (reg->type == PTR_TO_SOCKET) { > + if (t == BPF_WRITE) { > + verbose(env, "cannot write into socket\n"); > + return -EACCES; > + } > + err = check_sock_access(env, regno, off, size, t); > + if (!err && value_regno >= 0) > + mark_reg_unknown(env, regs, value_regno); > + > } else { > verbose(env, "R%d invalid mem access '%s'\n", regno, > reg_type_str[reg->type]); > @@ -1918,6 +1957,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, > err = check_ctx_reg(env, reg, regno); > if (err < 0) > return err; > + } else if (arg_type == ARG_PTR_TO_SOCKET) { > + expected_type = PTR_TO_SOCKET; > + if (type != expected_type) > + goto err_type; > } else if (arg_type_is_mem_ptr(arg_type)) { > expected_type = PTR_TO_STACK; > /* One exception here. In case function allows for NULL to be > @@ -2511,6 +2554,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn > } > regs[BPF_REG_0].map_ptr = meta.map_ptr; > regs[BPF_REG_0].id = ++env->id_gen; > + } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { > + mark_reg_known_zero(env, regs, BPF_REG_0); > + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; > + regs[BPF_REG_0].id = ++env->id_gen; > } else { > verbose(env, "unknown return type %d of func %s#%d\n", > fn->ret_type, func_id_name(func_id), func_id); > @@ -2648,6 +2695,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, > return -EACCES; > case CONST_PTR_TO_MAP: > case PTR_TO_PACKET_END: > + case PTR_TO_SOCKET: > + case PTR_TO_SOCKET_OR_NULL: > verbose(env, "R%d pointer arithmetic on %s prohibited\n", > dst, reg_type_str[ptr_reg->type]); > return -EACCES; > @@ -3595,6 +3644,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id, > } else { > reg->type = PTR_TO_MAP_VALUE; > } > + } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { > + reg->type = PTR_TO_SOCKET; > } > /* We don't need id from this point onwards anymore, thus we > * should better reset it, so that state pruning has chances > @@ -4369,6 +4420,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, > case PTR_TO_CTX: > case CONST_PTR_TO_MAP: > case PTR_TO_PACKET_END: > + case PTR_TO_SOCKET: > + case PTR_TO_SOCKET_OR_NULL: > /* Only valid matches are exact, which memcmp() above > * would have accepted > */ > @@ -4646,6 +4699,37 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > return 0; > } > > +/* Return true if it's OK to have the same insn return a different type. */ > +static bool reg_type_mismatch_ok(enum bpf_reg_type type) > +{ > + switch (type) { > + case PTR_TO_CTX: > + case PTR_TO_SOCKET: > + case PTR_TO_SOCKET_OR_NULL: > + return false; > + default: > + return true; > + } > +} > + > +/* If an instruction was previously used with particular pointer types, then we > + * need to be careful to avoid cases such as the below, where it may be ok > + * for one branch accessing the pointer, but not ok for the other branch: > + * > + * R1 = sock_ptr > + * goto X; > + * ... > + * R1 = some_other_valid_ptr; > + * goto X; > + * ... > + * R2 = *(u32 *)(R1 + 0); > + */ > +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) > +{ > + return src != prev && (!reg_type_mismatch_ok(src) || > + !reg_type_mismatch_ok(prev)); > +} > + > static int do_check(struct bpf_verifier_env *env) > { > struct bpf_verifier_state *state; > @@ -4778,9 +4862,7 @@ static int do_check(struct bpf_verifier_env *env) > */ > *prev_src_type = src_reg_type; > > - } else if (src_reg_type != *prev_src_type && > - (src_reg_type == PTR_TO_CTX || > - *prev_src_type == PTR_TO_CTX)) { > + } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { > /* ABuser program is trying to use the same insn > * dst_reg = *(u32*) (src_reg + off) > * with different pointer types: > @@ -4826,8 +4908,8 @@ static int do_check(struct bpf_verifier_env *env) > if (*prev_dst_type == NOT_INIT) { > *prev_dst_type = dst_reg_type; > } else if (dst_reg_type != *prev_dst_type && > - (dst_reg_type == PTR_TO_CTX || > - *prev_dst_type == PTR_TO_CTX)) { > + (!reg_type_mismatch_ok(dst_reg_type) || > + !reg_type_mismatch_ok(*prev_dst_type))) { reg_type_mismatch() could have been used here as well ? > verbose(env, "same insn cannot be used with different pointers\n"); > return -EINVAL; > } > @@ -5244,10 +5326,14 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) > } > } > > -/* convert load instructions that access fields of 'struct __sk_buff' > - * into sequence of instructions that access fields of 'struct sk_buff' > +/* convert load instructions that access fields of a context type into a > + * sequence of instructions that access fields of the underlying structure: > + * struct __sk_buff -> struct sk_buff > + * struct bpf_sock_ops -> struct sock > */ > -static int convert_ctx_accesses(struct bpf_verifier_env *env) > +static int convert_ctx_accesses(struct bpf_verifier_env *env, > + bpf_convert_ctx_access_t convert_ctx_access, > + enum bpf_reg_type ctx_type) > { > const struct bpf_verifier_ops *ops = env->ops; > int i, cnt, size, ctx_field_size, delta = 0; > @@ -5274,12 +5360,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) > } > } > > - if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) > + if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) > return 0; > > insn = env->prog->insnsi + delta; > > for (i = 0; i < insn_cnt; i++, insn++) { > + enum bpf_reg_type ptr_type; > + > if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || > insn->code == (BPF_LDX | BPF_MEM | BPF_H) || > insn->code == (BPF_LDX | BPF_MEM | BPF_W) || > @@ -5321,7 +5409,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) > continue; > } > > - if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) > + ptr_type = env->insn_aux_data[i + delta].ptr_type; > + if (ptr_type != ctx_type) > continue; > > ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; > @@ -5354,8 +5443,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) > } > > target_size = 0; > - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, > - &target_size); > + cnt = convert_ctx_access(type, insn, insn_buf, env->prog, > + &target_size); > if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || > (ctx_field_size && !target_size)) { > verbose(env, "bpf verifier is misconfigured\n"); > @@ -5899,7 +5988,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) > > if (ret == 0) > /* program is valid, convert *(u32*)(ctx + off) accesses */ > - ret = convert_ctx_accesses(env); > + ret = convert_ctx_accesses(env, env->ops->convert_ctx_access, > + PTR_TO_CTX); > + > + if (ret == 0) > + /* Convert *(u32*)(sock_ops + off) accesses */ > + ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access, > + PTR_TO_SOCKET); can it be done in single pass instead? env->insn_aux_data tells us what kind of conversion needs to happen. while progs are small, I guess, it's ok, but would like to see a follow up patch to this.
On Wed, 12 Sep 2018 at 15:50, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Tue, Sep 11, 2018 at 05:36:33PM -0700, Joe Stringer wrote: > > ... > > +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) > > +{ > > + return src != prev && (!reg_type_mismatch_ok(src) || > > + !reg_type_mismatch_ok(prev)); > > +} > > + > > static int do_check(struct bpf_verifier_env *env) > > { > > struct bpf_verifier_state *state; > > @@ -4778,9 +4862,7 @@ static int do_check(struct bpf_verifier_env *env) > > */ > > *prev_src_type = src_reg_type; > > > > - } else if (src_reg_type != *prev_src_type && > > - (src_reg_type == PTR_TO_CTX || > > - *prev_src_type == PTR_TO_CTX)) { > > + } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { > > /* ABuser program is trying to use the same insn > > * dst_reg = *(u32*) (src_reg + off) > > * with different pointer types: > > @@ -4826,8 +4908,8 @@ static int do_check(struct bpf_verifier_env *env) > > if (*prev_dst_type == NOT_INIT) { > > *prev_dst_type = dst_reg_type; > > } else if (dst_reg_type != *prev_dst_type && > > - (dst_reg_type == PTR_TO_CTX || > > - *prev_dst_type == PTR_TO_CTX)) { > > + (!reg_type_mismatch_ok(dst_reg_type) || > > + !reg_type_mismatch_ok(*prev_dst_type))) { > > reg_type_mismatch() could have been used here as well ? Missed that before, will fix. > > verbose(env, "same insn cannot be used with different pointers\n"); > > return -EINVAL; > > } > > @@ -5244,10 +5326,14 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) > > } > > } > > > > -/* convert load instructions that access fields of 'struct __sk_buff' > > - * into sequence of instructions that access fields of 'struct sk_buff' > > +/* convert load instructions that access fields of a context type into a > > + * sequence of instructions that access fields of the underlying structure: > > + * struct __sk_buff -> struct sk_buff > > + * struct bpf_sock_ops -> struct sock > > */ > > -static int convert_ctx_accesses(struct bpf_verifier_env *env) > > +static int convert_ctx_accesses(struct bpf_verifier_env *env, > > + bpf_convert_ctx_access_t convert_ctx_access, > > + enum bpf_reg_type ctx_type) > > { > > const struct bpf_verifier_ops *ops = env->ops; > > int i, cnt, size, ctx_field_size, delta = 0; > > @@ -5274,12 +5360,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) > > } > > } > > > > - if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) > > + if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) > > return 0; > > > > insn = env->prog->insnsi + delta; > > > > for (i = 0; i < insn_cnt; i++, insn++) { > > + enum bpf_reg_type ptr_type; > > + > > if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || > > insn->code == (BPF_LDX | BPF_MEM | BPF_H) || > > insn->code == (BPF_LDX | BPF_MEM | BPF_W) || > > @@ -5321,7 +5409,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) > > continue; > > } > > > > - if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) > > + ptr_type = env->insn_aux_data[i + delta].ptr_type; > > + if (ptr_type != ctx_type) > > continue; > > > > ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; > > @@ -5354,8 +5443,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) > > } > > > > target_size = 0; > > - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, > > - &target_size); > > + cnt = convert_ctx_access(type, insn, insn_buf, env->prog, > > + &target_size); > > if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || > > (ctx_field_size && !target_size)) { > > verbose(env, "bpf verifier is misconfigured\n"); > > @@ -5899,7 +5988,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) > > > > if (ret == 0) > > /* program is valid, convert *(u32*)(ctx + off) accesses */ > > - ret = convert_ctx_accesses(env); > > + ret = convert_ctx_accesses(env, env->ops->convert_ctx_access, > > + PTR_TO_CTX); > > + > > + if (ret == 0) > > + /* Convert *(u32*)(sock_ops + off) accesses */ > > + ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access, > > + PTR_TO_SOCKET); > > can it be done in single pass instead? > env->insn_aux_data tells us what kind of conversion needs to happen. > while progs are small, I guess, it's ok, but would like to see a follow up > patch to this. Yeah, good point - upon review it seems like this would be a simple change (incremental): diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db05f78bfc6e..7fa1b695a2a1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5329,9 +5329,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) * struct __sk_buff -> struct sk_buff * struct bpf_sock_ops -> struct sock */ -static int convert_ctx_accesses(struct bpf_verifier_env *env, - bpf_convert_ctx_access_t convert_ctx_access, - enum bpf_reg_type ctx_type) +static int convert_ctx_accesses(struct bpf_verifier_env *env) { const struct bpf_verifier_ops *ops = env->ops; int i, cnt, size, ctx_field_size, delta = 0; @@ -5358,13 +5356,13 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env, } } - if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_dev_bound(env->prog->aux)) return 0; insn = env->prog->insnsi + delta; for (i = 0; i < insn_cnt; i++, insn++) { - enum bpf_reg_type ptr_type; + bpf_convert_ctx_access_t convert_ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || @@ -5407,9 +5405,18 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env, continue; } - ptr_type = env->insn_aux_data[i + delta].ptr_type; - if (ptr_type != ctx_type) + switch (env->insn_aux_data[i + delta].ptr_type) { + case PTR_TO_CTX: + if (!ops->convert_ctx_access) + continue; + convert_ctx_access = ops->convert_ctx_access; + break; + case PTR_TO_SOCKET: + convert_ctx_access = bpf_sock_convert_ctx_access; + break; + default: continue; + } ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; size = BPF_LDST_BYTES(insn); @@ -5986,13 +5993,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ - ret = convert_ctx_accesses(env, env->ops->convert_ctx_access, - PTR_TO_CTX); - - if (ret == 0) - /* Convert *(u32*)(sock_ops + off) accesses */ - ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access, - PTR_TO_SOCKET); + ret = convert_ctx_accesses(env); if (ret == 0) ret = fixup_bpf_calls(env);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 523481a3471b..6ec93f3d66dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -154,6 +154,7 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ }; /* type of values returned from helper functions */ @@ -162,6 +163,7 @@ enum bpf_return_type { RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ + RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -212,6 +214,8 @@ enum bpf_reg_type { PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ + PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -334,6 +338,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, + const struct bpf_insn *src, + struct bpf_insn *dst, + struct bpf_prog *prog, + u32 *target_size); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); @@ -827,4 +836,12 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index af262b97f586..23a2b17bfd75 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -58,6 +58,8 @@ struct bpf_reg_state { * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we * came from, when one is tested for != NULL. + * For PTR_TO_SOCKET this is used to share which pointers retain the + * same reference to the socket, to determine proper reference freeing. */ u32 id; /* For scalar types (SCALAR_VALUE), this represents our knowledge of diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2357c8c90de..111e031cf65d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { * (like pointer plus pointer becomes SCALAR_VALUE type) * * When verifier sees load or store instructions the type of base register - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer - * types recognized by check_mem_access() function. + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are + * four pointer types recognized by check_mem_access() function. * * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' * and the range of [ptr, ptr + map's value_size) is accessible. @@ -266,6 +266,8 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET] = "pkt", [PTR_TO_PACKET_META] = "pkt_meta", [PTR_TO_PACKET_END] = "pkt_end", + [PTR_TO_SOCKET] = "sock", + [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", }; static char slot_type_char[] = { @@ -971,6 +973,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_PACKET_META: case PTR_TO_PACKET_END: case CONST_PTR_TO_MAP: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: return true; default: return false; @@ -1326,6 +1330,28 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, return -EACCES; } +static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, + int size, enum bpf_access_type t) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[regno]; + struct bpf_insn_access_aux info; + + if (reg->smin_value < 0) { + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + + if (!bpf_sock_is_valid_access(off, size, t, &info)) { + verbose(env, "invalid bpf_sock_ops access off=%d size=%d\n", + off, size); + return -EACCES; + } + + return 0; +} + static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg) { @@ -1441,6 +1467,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, */ strict = true; break; + case PTR_TO_SOCKET: + pointer_desc = "sock "; + break; default: break; } @@ -1697,6 +1726,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_packet_access(env, regno, off, size, false); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + + } else if (reg->type == PTR_TO_SOCKET) { + if (t == BPF_WRITE) { + verbose(env, "cannot write into socket\n"); + return -EACCES; + } + err = check_sock_access(env, regno, off, size, t); + if (!err && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -1918,6 +1957,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_ctx_reg(env, reg, regno); if (err < 0) return err; + } else if (arg_type == ARG_PTR_TO_SOCKET) { + expected_type = PTR_TO_SOCKET; + if (type != expected_type) + goto err_type; } else if (arg_type_is_mem_ptr(arg_type)) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be @@ -2511,6 +2554,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; + } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -2648,6 +2695,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; case CONST_PTR_TO_MAP: case PTR_TO_PACKET_END: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -3595,6 +3644,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id, } else { reg->type = PTR_TO_MAP_VALUE; } + } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { + reg->type = PTR_TO_SOCKET; } /* We don't need id from this point onwards anymore, thus we * should better reset it, so that state pruning has chances @@ -4369,6 +4420,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_CTX: case CONST_PTR_TO_MAP: case PTR_TO_PACKET_END: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -4646,6 +4699,37 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; } +/* Return true if it's OK to have the same insn return a different type. */ +static bool reg_type_mismatch_ok(enum bpf_reg_type type) +{ + switch (type) { + case PTR_TO_CTX: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: + return false; + default: + return true; + } +} + +/* If an instruction was previously used with particular pointer types, then we + * need to be careful to avoid cases such as the below, where it may be ok + * for one branch accessing the pointer, but not ok for the other branch: + * + * R1 = sock_ptr + * goto X; + * ... + * R1 = some_other_valid_ptr; + * goto X; + * ... + * R2 = *(u32 *)(R1 + 0); + */ +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) +{ + return src != prev && (!reg_type_mismatch_ok(src) || + !reg_type_mismatch_ok(prev)); +} + static int do_check(struct bpf_verifier_env *env) { struct bpf_verifier_state *state; @@ -4778,9 +4862,7 @@ static int do_check(struct bpf_verifier_env *env) */ *prev_src_type = src_reg_type; - } else if (src_reg_type != *prev_src_type && - (src_reg_type == PTR_TO_CTX || - *prev_src_type == PTR_TO_CTX)) { + } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { /* ABuser program is trying to use the same insn * dst_reg = *(u32*) (src_reg + off) * with different pointer types: @@ -4826,8 +4908,8 @@ static int do_check(struct bpf_verifier_env *env) if (*prev_dst_type == NOT_INIT) { *prev_dst_type = dst_reg_type; } else if (dst_reg_type != *prev_dst_type && - (dst_reg_type == PTR_TO_CTX || - *prev_dst_type == PTR_TO_CTX)) { + (!reg_type_mismatch_ok(dst_reg_type) || + !reg_type_mismatch_ok(*prev_dst_type))) { verbose(env, "same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -5244,10 +5326,14 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) } } -/* convert load instructions that access fields of 'struct __sk_buff' - * into sequence of instructions that access fields of 'struct sk_buff' +/* convert load instructions that access fields of a context type into a + * sequence of instructions that access fields of the underlying structure: + * struct __sk_buff -> struct sk_buff + * struct bpf_sock_ops -> struct sock */ -static int convert_ctx_accesses(struct bpf_verifier_env *env) +static int convert_ctx_accesses(struct bpf_verifier_env *env, + bpf_convert_ctx_access_t convert_ctx_access, + enum bpf_reg_type ctx_type) { const struct bpf_verifier_ops *ops = env->ops; int i, cnt, size, ctx_field_size, delta = 0; @@ -5274,12 +5360,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } } - if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) + if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) return 0; insn = env->prog->insnsi + delta; for (i = 0; i < insn_cnt; i++, insn++) { + enum bpf_reg_type ptr_type; + if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || @@ -5321,7 +5409,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } - if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) + ptr_type = env->insn_aux_data[i + delta].ptr_type; + if (ptr_type != ctx_type) continue; ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; @@ -5354,8 +5443,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } target_size = 0; - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, - &target_size); + cnt = convert_ctx_access(type, insn, insn_buf, env->prog, + &target_size); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || (ctx_field_size && !target_size)) { verbose(env, "bpf verifier is misconfigured\n"); @@ -5899,7 +5988,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ - ret = convert_ctx_accesses(env); + ret = convert_ctx_accesses(env, env->ops->convert_ctx_access, + PTR_TO_CTX); + + if (ret == 0) + /* Convert *(u32*)(sock_ops + off) accesses */ + ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access, + PTR_TO_SOCKET); if (ret == 0) ret = fixup_bpf_calls(env); diff --git a/net/core/filter.c b/net/core/filter.c index 8cb242b4400f..23e6e5202138 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5375,23 +5375,29 @@ static bool __sock_filter_check_size(int off, int size, return size == size_default; } -static bool sock_filter_is_valid_access(int off, int size, - enum bpf_access_type type, - const struct bpf_prog *prog, - struct bpf_insn_access_aux *info) +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) { if (off < 0 || off >= sizeof(struct bpf_sock)) return false; if (off % size != 0) return false; - if (!__sock_filter_check_attach_type(off, type, - prog->expected_attach_type)) - return false; if (!__sock_filter_check_size(off, size, info)) return false; return true; } +static bool sock_filter_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (!__sock_filter_check_attach_type(off, type, + prog->expected_attach_type)) + return false; + return bpf_sock_is_valid_access(off, size, type, info); +} + static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog, int drop_verdict) { @@ -6059,10 +6065,10 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, - const struct bpf_insn *si, - struct bpf_insn *insn_buf, - struct bpf_prog *prog, u32 *target_size) +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; int off; @@ -6974,7 +6980,7 @@ const struct bpf_prog_ops lwt_seg6local_prog_ops = { const struct bpf_verifier_ops cg_sock_verifier_ops = { .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, - .convert_ctx_access = sock_filter_convert_ctx_access, + .convert_ctx_access = bpf_sock_convert_ctx_access, }; const struct bpf_prog_ops cg_sock_prog_ops = {
Teach the verifier a little bit about a new type of pointer, a PTR_TO_SOCKET. This pointer type is accessed from BPF through the 'struct bpf_sock' structure. Signed-off-by: Joe Stringer <joe@wand.net.nz> --- include/linux/bpf.h | 17 +++++ include/linux/bpf_verifier.h | 2 + kernel/bpf/verifier.c | 125 ++++++++++++++++++++++++++++++----- net/core/filter.c | 30 +++++---- 4 files changed, 147 insertions(+), 27 deletions(-)