@@ -16,6 +16,7 @@
#include <linux/rbtree_latch.h>
#include <linux/numa.h>
#include <linux/wait.h>
+#include <linux/locallock.h>
struct bpf_verifier_env;
struct perf_event;
@@ -467,6 +468,7 @@ _out: \
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
+DECLARE_LOCAL_IRQ_LOCK(bpf_prog_active_lock);
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
@@ -668,11 +668,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
* we're calling kfree, otherwise deadlock is possible if kprobes
* are placed somewhere inside of slub
*/
- preempt_disable();
+ local_lock(bpf_prog_active_lock);
__this_cpu_inc(bpf_prog_active);
htab_elem_free(htab, l);
__this_cpu_dec(bpf_prog_active);
- preempt_enable();
+ local_unlock(bpf_prog_active_lock);
}
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
@@ -42,6 +42,7 @@
#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
DEFINE_PER_CPU(int, bpf_prog_active);
+DEFINE_LOCAL_IRQ_LOCK(bpf_prog_active_lock);
static DEFINE_IDR(prog_idr);
static DEFINE_SPINLOCK(prog_idr_lock);
static DEFINE_IDR(map_idr);
@@ -716,7 +717,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto done;
}
- preempt_disable();
+ local_lock(bpf_prog_active_lock);
this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -750,7 +751,7 @@ static int map_lookup_elem(union bpf_attr *attr)
rcu_read_unlock();
}
this_cpu_dec(bpf_prog_active);
- preempt_enable();
+ local_unlock(bpf_prog_active_lock);
done:
if (err)
@@ -845,7 +846,7 @@ static int map_update_elem(union bpf_attr *attr)
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
* inside bpf map update or delete otherwise deadlocks are possible
*/
- preempt_disable();
+ local_lock(bpf_prog_active_lock);
__this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -878,7 +879,7 @@ static int map_update_elem(union bpf_attr *attr)
rcu_read_unlock();
}
__this_cpu_dec(bpf_prog_active);
- preempt_enable();
+ local_unlock(bpf_prog_active_lock);
maybe_wait_bpf_programs(map);
out:
free_value:
@@ -925,13 +926,13 @@ static int map_delete_elem(union bpf_attr *attr)
goto out;
}
- preempt_disable();
+ local_lock(bpf_prog_active_lock);
__this_cpu_inc(bpf_prog_active);
rcu_read_lock();
err = map->ops->map_delete_elem(map, key);
rcu_read_unlock();
__this_cpu_dec(bpf_prog_active);
- preempt_enable();
+ local_unlock(bpf_prog_active_lock);
maybe_wait_bpf_programs(map);
out:
kfree(key);
@@ -8546,7 +8546,7 @@ static void bpf_overflow_handler(struct perf_event *event,
int ret = 0;
ctx.regs = perf_arch_bpf_user_pt_regs(regs);
- preempt_disable();
+ local_lock(bpf_prog_active_lock);
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
goto out;
rcu_read_lock();
@@ -8555,6 +8555,7 @@ static void bpf_overflow_handler(struct perf_event *event,
out:
__this_cpu_dec(bpf_prog_active);
preempt_enable();
+ local_unlock(bpf_prog_active_lock);
if (!ret)
return;
@@ -78,8 +78,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
if (in_nmi()) /* not supported yet */
return 1;
- preempt_disable();
-
+ local_lock(bpf_prog_active_lock);
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
/*
* since some bpf program is already running on this cpu,
@@ -110,7 +109,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
out:
__this_cpu_dec(bpf_prog_active);
- preempt_enable();
+ local_unlock(bpf_prog_active_lock);
return ret;
}
The bpf_prog_active counter is used to avoid recursion on the same CPU. On RT we can't keep it with the preempt-disable part because the syscall may need to acquire locks or allocate memory. Use a locallock() to avoid recursion on the same CPU. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/bpf.h | 2 ++ kernel/bpf/hashtab.c | 4 ++-- kernel/bpf/syscall.c | 13 +++++++------ kernel/events/core.c | 3 ++- kernel/trace/bpf_trace.c | 5 ++--- 5 files changed, 15 insertions(+), 12 deletions(-)