Message ID | 20191119193036.92831-4-brianvv@google.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | add bpf batch ops to process more than 1 elem | expand |
On 11/19/19 11:30 AM, Brian Vazquez wrote: > This commit adds generic support for update and delete batch ops that > can be used for almost all the bpf maps. These commands share the same > UAPI attr that lookup and lookup_and_delete batch ops use and the > syscall commands are: > > BPF_MAP_UPDATE_BATCH > BPF_MAP_DELETE_BATCH > > The main difference between update/delete and lookup/lookup_and_delete > batch ops is that for update/delete keys/values must be specified for > userspace and because of that, neither in_batch nor out_batch are used. > > Suggested-by: Stanislav Fomichev <sdf@google.com> > Signed-off-by: Brian Vazquez <brianvv@google.com> > Signed-off-by: Yonghong Song <yhs@fb.com> > --- > include/linux/bpf.h | 10 ++++ > include/uapi/linux/bpf.h | 2 + > kernel/bpf/syscall.c | 126 ++++++++++++++++++++++++++++++++++++++- > 3 files changed, 137 insertions(+), 1 deletion(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 767a823dbac74..96a19e1fd2b5b 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -46,6 +46,10 @@ struct bpf_map_ops { > int (*map_lookup_and_delete_batch)(struct bpf_map *map, > const union bpf_attr *attr, > union bpf_attr __user *uattr); > + int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, > + union bpf_attr __user *uattr); > + int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, > + union bpf_attr __user *uattr); > > /* funcs callable from userspace and from eBPF programs */ > void *(*map_lookup_elem)(struct bpf_map *map, void *key); > @@ -808,6 +812,12 @@ int generic_map_lookup_batch(struct bpf_map *map, > int generic_map_lookup_and_delete_batch(struct bpf_map *map, > const union bpf_attr *attr, > union bpf_attr __user *uattr); > +int generic_map_update_batch(struct bpf_map *map, > + const union bpf_attr *attr, > + union bpf_attr __user *uattr); > +int generic_map_delete_batch(struct bpf_map *map, > + const union bpf_attr *attr, > + union bpf_attr __user *uattr); > > extern int sysctl_unprivileged_bpf_disabled; > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index e60b7b7cda61a..0f6ff0c4d79dd 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -109,6 +109,8 @@ enum bpf_cmd { > BPF_BTF_GET_NEXT_ID, > BPF_MAP_LOOKUP_BATCH, > BPF_MAP_LOOKUP_AND_DELETE_BATCH, > + BPF_MAP_UPDATE_BATCH, > + BPF_MAP_DELETE_BATCH, > }; > > enum bpf_map_type { > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index d0d3d0e0eaca4..06e1bcf40fb8d 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -1127,6 +1127,120 @@ static int map_get_next_key(union bpf_attr *attr) > return err; > } > > +int generic_map_delete_batch(struct bpf_map *map, > + const union bpf_attr *attr, > + union bpf_attr __user *uattr) > +{ > + void __user *keys = u64_to_user_ptr(attr->batch.keys); > + int ufd = attr->map_fd; > + u32 cp, max_count; > + struct fd f; > + void *key; > + int err; > + > + f = fdget(ufd); > + if (attr->batch.elem_flags & ~BPF_F_LOCK) > + return -EINVAL; > + > + if ((attr->batch.elem_flags & BPF_F_LOCK) && > + !map_value_has_spin_lock(map)) { > + err = -EINVAL; > + goto err_put; Just return -EINVAL? > + } > + > + max_count = attr->batch.count; > + if (!max_count) > + return 0; > + > + err = -ENOMEM; Why initialize err to -ENOMEM? Maybe just err = 0. > + for (cp = 0; cp < max_count; cp++) { > + key = __bpf_copy_key(keys + cp * map->key_size, map->key_size); > + if (IS_ERR(key)) { > + err = PTR_ERR(key); > + break; > + } > + > + if (err) > + break; The above is incorrect, esp. if you assign err initial value to -ENOMEM. The above ` if (err) break; ` is not really needed. If there is error, you already break in the above. If map->key_size is not 0, the return value 'key' cannot be NULL pointer. > + if (bpf_map_is_dev_bound(map)) { > + err = bpf_map_offload_delete_elem(map, key); > + break; > + } > + > + preempt_disable(); > + __this_cpu_inc(bpf_prog_active); > + rcu_read_lock(); > + err = map->ops->map_delete_elem(map, key); > + rcu_read_unlock(); > + __this_cpu_dec(bpf_prog_active); > + preempt_enable(); > + maybe_wait_bpf_programs(map); > + if (err) > + break; > + } > + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) > + err = -EFAULT; If previous err = -EFAULT, even if copy_to_user() succeeded, return value will be -EFAULT, so uattr->batch.count cannot be trusted. So may be do if (err != -EFAULT && copy_to_user(...)) err = -EFAULT ? There are several other places like this. > +err_put: You don't need err_put label in the above. > + return err; > +} > +int generic_map_update_batch(struct bpf_map *map, > + const union bpf_attr *attr, > + union bpf_attr __user *uattr) > +{ > + void __user *values = u64_to_user_ptr(attr->batch.values); > + void __user *keys = u64_to_user_ptr(attr->batch.keys); > + u32 value_size, cp, max_count; > + int ufd = attr->map_fd; > + void *key, *value; > + struct fd f; > + int err; > + > + f = fdget(ufd); > + if (attr->batch.elem_flags & ~BPF_F_LOCK) > + return -EINVAL; > + > + if ((attr->batch.elem_flags & BPF_F_LOCK) && > + !map_value_has_spin_lock(map)) { > + err = -EINVAL; > + goto err_put; Directly return -EINVAL? > + } > + > + value_size = bpf_map_value_size(map); > + > + max_count = attr->batch.count; > + if (!max_count) > + return 0; > + > + err = -ENOMEM; > + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); > + if (!value) > + goto err_put; Directly return -ENOMEM? > + > + for (cp = 0; cp < max_count; cp++) { > + key = __bpf_copy_key(keys + cp * map->key_size, map->key_size); Do you need to free 'key' after its use? > + if (IS_ERR(key)) { > + err = PTR_ERR(key); > + break; > + } > + err = -EFAULT; > + if (copy_from_user(value, values + cp * value_size, value_size)) > + break; > + > + err = bpf_map_update_value(map, f, key, value, > + attr->batch.elem_flags); > + > + if (err) > + break; > + } > + > + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) > + err = -EFAULT; Similar to the above comment, if err already -EFAULT, no need to do copy_to_user(). > + > + kfree(value); > +err_put: err_put label is not needed. > + return err; > +} > + > static int __generic_map_lookup_batch(struct bpf_map *map, > const union bpf_attr *attr, > union bpf_attr __user *uattr, > @@ -3117,8 +3231,12 @@ static int bpf_map_do_batch(const union bpf_attr *attr, > > if (cmd == BPF_MAP_LOOKUP_BATCH) > BPF_DO_BATCH(map->ops->map_lookup_batch); > - else > + else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) > BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); > + else if (cmd == BPF_MAP_UPDATE_BATCH) > + BPF_DO_BATCH(map->ops->map_update_batch); > + else > + BPF_DO_BATCH(map->ops->map_delete_batch); Also need to check map_get_sys_perms() permissions for these two new commands. Both delete and update needs FMODE_CAN_WRITE permission. > > err_put: > fdput(f); > @@ -3229,6 +3347,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz > err = bpf_map_do_batch(&attr, uattr, > BPF_MAP_LOOKUP_AND_DELETE_BATCH); > break; > + case BPF_MAP_UPDATE_BATCH: > + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH); > + break; > + case BPF_MAP_DELETE_BATCH: > + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); > + break; > default: > err = -EINVAL; > break; >
ACK to all the observations, will fix in the next version. There are just 2 things might be correct, PTAL. On Thu, Nov 21, 2019 at 10:00 AM Yonghong Song <yhs@fb.com> wrote: > > > > On 11/19/19 11:30 AM, Brian Vazquez wrote: > > This commit adds generic support for update and delete batch ops that > > can be used for almost all the bpf maps. These commands share the same > > UAPI attr that lookup and lookup_and_delete batch ops use and the > > syscall commands are: > > > > BPF_MAP_UPDATE_BATCH > > BPF_MAP_DELETE_BATCH > > > > The main difference between update/delete and lookup/lookup_and_delete > > batch ops is that for update/delete keys/values must be specified for > > userspace and because of that, neither in_batch nor out_batch are used. > > > > Suggested-by: Stanislav Fomichev <sdf@google.com> > > Signed-off-by: Brian Vazquez <brianvv@google.com> > > Signed-off-by: Yonghong Song <yhs@fb.com> > > --- > > include/linux/bpf.h | 10 ++++ > > include/uapi/linux/bpf.h | 2 + > > kernel/bpf/syscall.c | 126 ++++++++++++++++++++++++++++++++++++++- > > 3 files changed, 137 insertions(+), 1 deletion(-) > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index 767a823dbac74..96a19e1fd2b5b 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -46,6 +46,10 @@ struct bpf_map_ops { > > int (*map_lookup_and_delete_batch)(struct bpf_map *map, > > const union bpf_attr *attr, > > union bpf_attr __user *uattr); > > + int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, > > + union bpf_attr __user *uattr); > > + int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, > > + union bpf_attr __user *uattr); > > > > /* funcs callable from userspace and from eBPF programs */ > > void *(*map_lookup_elem)(struct bpf_map *map, void *key); > > @@ -808,6 +812,12 @@ int generic_map_lookup_batch(struct bpf_map *map, > > int generic_map_lookup_and_delete_batch(struct bpf_map *map, > > const union bpf_attr *attr, > > union bpf_attr __user *uattr); > > +int generic_map_update_batch(struct bpf_map *map, > > + const union bpf_attr *attr, > > + union bpf_attr __user *uattr); > > +int generic_map_delete_batch(struct bpf_map *map, > > + const union bpf_attr *attr, > > + union bpf_attr __user *uattr); > > > > extern int sysctl_unprivileged_bpf_disabled; > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index e60b7b7cda61a..0f6ff0c4d79dd 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -109,6 +109,8 @@ enum bpf_cmd { > > BPF_BTF_GET_NEXT_ID, > > BPF_MAP_LOOKUP_BATCH, > > BPF_MAP_LOOKUP_AND_DELETE_BATCH, > > + BPF_MAP_UPDATE_BATCH, > > + BPF_MAP_DELETE_BATCH, > > }; > > > > enum bpf_map_type { > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > > index d0d3d0e0eaca4..06e1bcf40fb8d 100644 > > --- a/kernel/bpf/syscall.c > > +++ b/kernel/bpf/syscall.c > > @@ -1127,6 +1127,120 @@ static int map_get_next_key(union bpf_attr *attr) > > return err; > > } > > > > +int generic_map_delete_batch(struct bpf_map *map, > > + const union bpf_attr *attr, > > + union bpf_attr __user *uattr) > > +{ > > + void __user *keys = u64_to_user_ptr(attr->batch.keys); > > + int ufd = attr->map_fd; > > + u32 cp, max_count; > > + struct fd f; > > + void *key; > > + int err; > > + > > + f = fdget(ufd); > > + if (attr->batch.elem_flags & ~BPF_F_LOCK) > > + return -EINVAL; > > + > > + if ((attr->batch.elem_flags & BPF_F_LOCK) && > > + !map_value_has_spin_lock(map)) { > > + err = -EINVAL; > > + goto err_put; > > Just return -EINVAL? > > > + } > > + > > + max_count = attr->batch.count; > > + if (!max_count) > > + return 0; > > + > > + err = -ENOMEM; > > Why initialize err to -ENOMEM? Maybe just err = 0. > > > + for (cp = 0; cp < max_count; cp++) { > > + key = __bpf_copy_key(keys + cp * map->key_size, map->key_size); > > + if (IS_ERR(key)) { > > + err = PTR_ERR(key); > > + break; > > + } > > + > > + if (err) > > + break; > > The above is incorrect, esp. if you assign err initial value to -ENOMEM. > The above ` if (err) break; ` is not really needed. If there is error, > you already break in the above. > If map->key_size is not 0, the return value 'key' cannot be NULL pointer. > > > + if (bpf_map_is_dev_bound(map)) { > > + err = bpf_map_offload_delete_elem(map, key); > > + break; > > + } > > + > > + preempt_disable(); > > + __this_cpu_inc(bpf_prog_active); > > + rcu_read_lock(); > > + err = map->ops->map_delete_elem(map, key); > > + rcu_read_unlock(); > > + __this_cpu_dec(bpf_prog_active); > > + preempt_enable(); > > + maybe_wait_bpf_programs(map); > > + if (err) > > + break; > > + } > > + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) > > + err = -EFAULT; > > If previous err = -EFAULT, even if copy_to_user() succeeded, > return value will be -EFAULT, so uattr->batch.count cannot be > trusted. So may be do > if (err != -EFAULT && copy_to_user(...)) > err = -EFAULT > ? > There are several other places like this. I think whatever the err is, cp contains the right amount of entries correctly updated/deleted and the idea is that you should always try to copy that value to batch.count, and if that fails when uattr was created by libbpf, everything was set to 0. > > > +err_put: > > You don't need err_put label in the above. > > > + return err; > > +} > > +int generic_map_update_batch(struct bpf_map *map, > > + const union bpf_attr *attr, > > + union bpf_attr __user *uattr) > > +{ > > + void __user *values = u64_to_user_ptr(attr->batch.values); > > + void __user *keys = u64_to_user_ptr(attr->batch.keys); > > + u32 value_size, cp, max_count; > > + int ufd = attr->map_fd; > > + void *key, *value; > > + struct fd f; > > + int err; > > + > > + f = fdget(ufd); > > + if (attr->batch.elem_flags & ~BPF_F_LOCK) > > + return -EINVAL; > > + > > + if ((attr->batch.elem_flags & BPF_F_LOCK) && > > + !map_value_has_spin_lock(map)) { > > + err = -EINVAL; > > + goto err_put; > > Directly return -EINVAL? > > > + } > > + > > + value_size = bpf_map_value_size(map); > > + > > + max_count = attr->batch.count; > > + if (!max_count) > > + return 0; > > + > > + err = -ENOMEM; > > + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); > > + if (!value) > > + goto err_put; > > Directly return -ENOMEM? > > > + > > + for (cp = 0; cp < max_count; cp++) { > > + key = __bpf_copy_key(keys + cp * map->key_size, map->key_size); > > Do you need to free 'key' after its use? > > > + if (IS_ERR(key)) { > > + err = PTR_ERR(key); > > + break; > > + } > > + err = -EFAULT; > > + if (copy_from_user(value, values + cp * value_size, value_size)) > > + break; > > + > > + err = bpf_map_update_value(map, f, key, value, > > + attr->batch.elem_flags); > > + > > + if (err) > > + break; > > + } > > + > > + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) > > + err = -EFAULT; > > Similar to the above comment, if err already -EFAULT, no need > to do copy_to_user(). > > > + > > + kfree(value); > > +err_put: > > err_put label is not needed. > > > + return err; > > +} > > + > > static int __generic_map_lookup_batch(struct bpf_map *map, > > const union bpf_attr *attr, > > union bpf_attr __user *uattr, > > @@ -3117,8 +3231,12 @@ static int bpf_map_do_batch(const union bpf_attr *attr, > > > > if (cmd == BPF_MAP_LOOKUP_BATCH) > > BPF_DO_BATCH(map->ops->map_lookup_batch); > > - else > > + else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) > > BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); > > + else if (cmd == BPF_MAP_UPDATE_BATCH) > > + BPF_DO_BATCH(map->ops->map_update_batch); > > + else > > + BPF_DO_BATCH(map->ops->map_delete_batch); > > Also need to check map_get_sys_perms() permissions for these two new > commands. Both delete and update needs FMODE_CAN_WRITE permission. > I also got confused for a moment, the check is correct since is using '!=' not '==' if (cmd != BPF_MAP_LOOKUP_BATCH && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { so basically that means that cmd is update,delete or lookup_and_delete so we check map_get_sys_perms. > > > > err_put: > > fdput(f); > > @@ -3229,6 +3347,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz > > err = bpf_map_do_batch(&attr, uattr, > > BPF_MAP_LOOKUP_AND_DELETE_BATCH); > > break; > > + case BPF_MAP_UPDATE_BATCH: > > + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH); > > + break; > > + case BPF_MAP_DELETE_BATCH: > > + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); > > + break; > > default: > > err = -EINVAL; > > break; > >
On 11/21/19 9:50 PM, Brian Vazquez wrote: > ACK to all the observations, will fix in the next version. There are > just 2 things might be correct, PTAL. > > On Thu, Nov 21, 2019 at 10:00 AM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 11/19/19 11:30 AM, Brian Vazquez wrote: >>> This commit adds generic support for update and delete batch ops that >>> can be used for almost all the bpf maps. These commands share the same >>> UAPI attr that lookup and lookup_and_delete batch ops use and the >>> syscall commands are: >>> >>> BPF_MAP_UPDATE_BATCH >>> BPF_MAP_DELETE_BATCH >>> >>> The main difference between update/delete and lookup/lookup_and_delete >>> batch ops is that for update/delete keys/values must be specified for >>> userspace and because of that, neither in_batch nor out_batch are used. >>> >>> Suggested-by: Stanislav Fomichev <sdf@google.com> >>> Signed-off-by: Brian Vazquez <brianvv@google.com> >>> Signed-off-by: Yonghong Song <yhs@fb.com> >>> --- >>> include/linux/bpf.h | 10 ++++ >>> include/uapi/linux/bpf.h | 2 + >>> kernel/bpf/syscall.c | 126 ++++++++++++++++++++++++++++++++++++++- >>> 3 files changed, 137 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >>> index 767a823dbac74..96a19e1fd2b5b 100644 >>> --- a/include/linux/bpf.h >>> +++ b/include/linux/bpf.h >>> @@ -46,6 +46,10 @@ struct bpf_map_ops { >>> int (*map_lookup_and_delete_batch)(struct bpf_map *map, >>> const union bpf_attr *attr, >>> union bpf_attr __user *uattr); >>> + int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, >>> + union bpf_attr __user *uattr); >>> + int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, >>> + union bpf_attr __user *uattr); >>> [...] >>> + >>> + preempt_disable(); >>> + __this_cpu_inc(bpf_prog_active); >>> + rcu_read_lock(); >>> + err = map->ops->map_delete_elem(map, key); >>> + rcu_read_unlock(); >>> + __this_cpu_dec(bpf_prog_active); >>> + preempt_enable(); >>> + maybe_wait_bpf_programs(map); >>> + if (err) >>> + break; >>> + } >>> + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) >>> + err = -EFAULT; >> >> If previous err = -EFAULT, even if copy_to_user() succeeded, >> return value will be -EFAULT, so uattr->batch.count cannot be >> trusted. So may be do >> if (err != -EFAULT && copy_to_user(...)) >> err = -EFAULT >> ? >> There are several other places like this. > > I think whatever the err is, cp contains the right amount of entries > correctly updated/deleted and the idea is that you should always try > to copy that value to batch.count, and if that fails when uattr was > created by libbpf, everything was set to 0. This is what I mean: err = -EFAULT; // from previous error if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) err = -EFAULT; return err; User space will not trust uattr->batch.count even copy_to_user() is successful since -EFAULT is returned. There are two ways to address this issue if previous error is -EFAULT, 1. do not copy_to_user() and return -EFAULT, which is I suggested in the above. 2. go ahead to do copy_to_user() and if it is successful, change return value to something different from -EFAULT to indicate that uattr->batch.count is valid. I feel it is important to return actual error code -EFAULT to user so user knows some fault happens. Returning other error code may be misleading during debugging. > >> >>> +err_put: >> >> You don't need err_put label in the above. >> >>> + return err; >>> +} >>> +int generic_map_update_batch(struct bpf_map *map, >>> + const union bpf_attr *attr, >>> + union bpf_attr __user *uattr) >>> +{ >>> + void __user *values = u64_to_user_ptr(attr->batch.values); >>> + void __user *keys = u64_to_user_ptr(attr->batch.keys); >>> + u32 value_size, cp, max_count; >>> + int ufd = attr->map_fd; >>> + void *key, *value; >>> + struct fd f; >>> + int err; >>> + >>> + f = fdget(ufd); >>> + if (attr->batch.elem_flags & ~BPF_F_LOCK) >>> + return -EINVAL; >>> + >>> + if ((attr->batch.elem_flags & BPF_F_LOCK) && >>> + !map_value_has_spin_lock(map)) { >>> + err = -EINVAL; >>> + goto err_put; >> >> Directly return -EINVAL? >> >>> + } >>> + >>> + value_size = bpf_map_value_size(map); >>> + >>> + max_count = attr->batch.count; >>> + if (!max_count) >>> + return 0; >>> + >>> + err = -ENOMEM; >>> + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); >>> + if (!value) >>> + goto err_put; >> >> Directly return -ENOMEM? >> >>> + >>> + for (cp = 0; cp < max_count; cp++) { >>> + key = __bpf_copy_key(keys + cp * map->key_size, map->key_size); >> >> Do you need to free 'key' after its use? >> >>> + if (IS_ERR(key)) { >>> + err = PTR_ERR(key); >>> + break; >>> + } >>> + err = -EFAULT; >>> + if (copy_from_user(value, values + cp * value_size, value_size)) >>> + break; >>> + >>> + err = bpf_map_update_value(map, f, key, value, >>> + attr->batch.elem_flags); >>> + >>> + if (err) >>> + break; >>> + } >>> + >>> + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) >>> + err = -EFAULT; >> >> Similar to the above comment, if err already -EFAULT, no need >> to do copy_to_user(). >> >>> + >>> + kfree(value); >>> +err_put: >> >> err_put label is not needed. >> >>> + return err; >>> +} >>> + >>> static int __generic_map_lookup_batch(struct bpf_map *map, >>> const union bpf_attr *attr, >>> union bpf_attr __user *uattr, >>> @@ -3117,8 +3231,12 @@ static int bpf_map_do_batch(const union bpf_attr *attr, >>> >>> if (cmd == BPF_MAP_LOOKUP_BATCH) >>> BPF_DO_BATCH(map->ops->map_lookup_batch); >>> - else >>> + else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) >>> BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); >>> + else if (cmd == BPF_MAP_UPDATE_BATCH) >>> + BPF_DO_BATCH(map->ops->map_update_batch); >>> + else >>> + BPF_DO_BATCH(map->ops->map_delete_batch); >> >> Also need to check map_get_sys_perms() permissions for these two new >> commands. Both delete and update needs FMODE_CAN_WRITE permission. >> > I also got confused for a moment, the check is correct since is using > '!=' not '==' > if (cmd != BPF_MAP_LOOKUP_BATCH && > !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { > > so basically that means that cmd is update,delete or lookup_and_delete > so we check map_get_sys_perms. I missed this. Thanks for explanation!
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 767a823dbac74..96a19e1fd2b5b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -46,6 +46,10 @@ struct bpf_map_ops { int (*map_lookup_and_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); + int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, + union bpf_attr __user *uattr); + int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, + union bpf_attr __user *uattr); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); @@ -808,6 +812,12 @@ int generic_map_lookup_batch(struct bpf_map *map, int generic_map_lookup_and_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); +int generic_map_update_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr); +int generic_map_delete_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr); extern int sysctl_unprivileged_bpf_disabled; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e60b7b7cda61a..0f6ff0c4d79dd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -109,6 +109,8 @@ enum bpf_cmd { BPF_BTF_GET_NEXT_ID, BPF_MAP_LOOKUP_BATCH, BPF_MAP_LOOKUP_AND_DELETE_BATCH, + BPF_MAP_UPDATE_BATCH, + BPF_MAP_DELETE_BATCH, }; enum bpf_map_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d0d3d0e0eaca4..06e1bcf40fb8d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1127,6 +1127,120 @@ static int map_get_next_key(union bpf_attr *attr) return err; } +int generic_map_delete_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *keys = u64_to_user_ptr(attr->batch.keys); + int ufd = attr->map_fd; + u32 cp, max_count; + struct fd f; + void *key; + int err; + + f = fdget(ufd); + if (attr->batch.elem_flags & ~BPF_F_LOCK) + return -EINVAL; + + if ((attr->batch.elem_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + + max_count = attr->batch.count; + if (!max_count) + return 0; + + err = -ENOMEM; + for (cp = 0; cp < max_count; cp++) { + key = __bpf_copy_key(keys + cp * map->key_size, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); + break; + } + + if (err) + break; + if (bpf_map_is_dev_bound(map)) { + err = bpf_map_offload_delete_elem(map, key); + break; + } + + preempt_disable(); + __this_cpu_inc(bpf_prog_active); + rcu_read_lock(); + err = map->ops->map_delete_elem(map, key); + rcu_read_unlock(); + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + maybe_wait_bpf_programs(map); + if (err) + break; + } + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) + err = -EFAULT; +err_put: + return err; +} +int generic_map_update_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *values = u64_to_user_ptr(attr->batch.values); + void __user *keys = u64_to_user_ptr(attr->batch.keys); + u32 value_size, cp, max_count; + int ufd = attr->map_fd; + void *key, *value; + struct fd f; + int err; + + f = fdget(ufd); + if (attr->batch.elem_flags & ~BPF_F_LOCK) + return -EINVAL; + + if ((attr->batch.elem_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + + value_size = bpf_map_value_size(map); + + max_count = attr->batch.count; + if (!max_count) + return 0; + + err = -ENOMEM; + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) + goto err_put; + + for (cp = 0; cp < max_count; cp++) { + key = __bpf_copy_key(keys + cp * map->key_size, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); + break; + } + err = -EFAULT; + if (copy_from_user(value, values + cp * value_size, value_size)) + break; + + err = bpf_map_update_value(map, f, key, value, + attr->batch.elem_flags); + + if (err) + break; + } + + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) + err = -EFAULT; + + kfree(value); +err_put: + return err; +} + static int __generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr, @@ -3117,8 +3231,12 @@ static int bpf_map_do_batch(const union bpf_attr *attr, if (cmd == BPF_MAP_LOOKUP_BATCH) BPF_DO_BATCH(map->ops->map_lookup_batch); - else + else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); + else if (cmd == BPF_MAP_UPDATE_BATCH) + BPF_DO_BATCH(map->ops->map_update_batch); + else + BPF_DO_BATCH(map->ops->map_delete_batch); err_put: fdput(f); @@ -3229,6 +3347,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_AND_DELETE_BATCH); break; + case BPF_MAP_UPDATE_BATCH: + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH); + break; + case BPF_MAP_DELETE_BATCH: + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); + break; default: err = -EINVAL; break;