Message ID | 20190410143025.11997-1-bigeasy@linutronix.de |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [1/3] bpf: Use spinlock_t in bpf_lru_list | expand |
On 4/10/19 7:30 AM, Sebastian Andrzej Siewior wrote: > There is no difference between spinlock_t and raw_spinlock_t for !RT > kernels. It is possible that bpf_lru_list_pop_free_to_local() invokes > at least three list walks which look unbounded it probably makes sense > to use spinlock_t. > > Make bpf_lru_list use a spinlock_t. Could you add a cover letter for the patch set since you have more than one patch? > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > --- > kernel/bpf/bpf_lru_list.c | 38 +++++++++++++++++++------------------- > kernel/bpf/bpf_lru_list.h | 4 ++-- > 2 files changed, 21 insertions(+), 21 deletions(-) > > diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c > index e6ef4401a1380..40f47210c3817 100644 > --- a/kernel/bpf/bpf_lru_list.c > +++ b/kernel/bpf/bpf_lru_list.c > @@ -313,9 +313,9 @@ static void bpf_lru_list_push_free(struct bpf_lru_list *l, > if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) > return; > > - raw_spin_lock_irqsave(&l->lock, flags); > + spin_lock_irqsave(&l->lock, flags); > __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); > - raw_spin_unlock_irqrestore(&l->lock, flags); > + spin_unlock_irqrestore(&l->lock, flags); > } This function (plus many others) is called by bpf program helpers which cannot sleep. Is it possible that under RT spin_lock_irqsave() could sleep and this will make bpf subsystem cannot be used under RT? > > static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, > @@ -325,7 +325,7 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, > struct bpf_lru_node *node, *tmp_node; > unsigned int nfree = 0; > > - raw_spin_lock(&l->lock); > + spin_lock(&l->lock); > > __local_list_flush(l, loc_l); > > @@ -344,7 +344,7 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, > local_free_list(loc_l), > BPF_LRU_LOCAL_LIST_T_FREE); > > - raw_spin_unlock(&l->lock); > + spin_unlock(&l->lock); > } > > static void __local_list_add_pending(struct bpf_lru *lru, [...] > static void bpf_lru_list_init(struct bpf_lru_list *l) > @@ -642,7 +642,7 @@ static void bpf_lru_list_init(struct bpf_lru_list *l) > > l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; > > - raw_spin_lock_init(&l->lock); > + spin_lock_init(&l->lock); > } > > int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, > diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h > index 7d4f89b7cb841..4e1e4608f1bb0 100644 > --- a/kernel/bpf/bpf_lru_list.h > +++ b/kernel/bpf/bpf_lru_list.h > @@ -36,13 +36,13 @@ struct bpf_lru_list { > /* The next inacitve list rotation starts from here */ > struct list_head *next_inactive_rotation; > > - raw_spinlock_t lock ____cacheline_aligned_in_smp; > + spinlock_t lock ____cacheline_aligned_in_smp; > }; > > struct bpf_lru_locallist { > struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T]; > u16 next_steal; > - raw_spinlock_t lock; > + spinlock_t lock; > }; > > struct bpf_common_lru { >
On 04/10/2019 07:08 PM, Yonghong Song wrote: > On 4/10/19 7:30 AM, Sebastian Andrzej Siewior wrote: >> There is no difference between spinlock_t and raw_spinlock_t for !RT >> kernels. It is possible that bpf_lru_list_pop_free_to_local() invokes >> at least three list walks which look unbounded it probably makes sense >> to use spinlock_t. >> >> Make bpf_lru_list use a spinlock_t. > > Could you add a cover letter for the patch set since you have > more than one patch? > >> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> >> --- >> kernel/bpf/bpf_lru_list.c | 38 +++++++++++++++++++------------------- >> kernel/bpf/bpf_lru_list.h | 4 ++-- >> 2 files changed, 21 insertions(+), 21 deletions(-) >> >> diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c >> index e6ef4401a1380..40f47210c3817 100644 >> --- a/kernel/bpf/bpf_lru_list.c >> +++ b/kernel/bpf/bpf_lru_list.c >> @@ -313,9 +313,9 @@ static void bpf_lru_list_push_free(struct bpf_lru_list *l, >> if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) >> return; >> >> - raw_spin_lock_irqsave(&l->lock, flags); >> + spin_lock_irqsave(&l->lock, flags); >> __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); >> - raw_spin_unlock_irqrestore(&l->lock, flags); >> + spin_unlock_irqrestore(&l->lock, flags); >> } > > This function (plus many others) is called by bpf program helpers which > cannot sleep. Is it possible that under RT spin_lock_irqsave() could > sleep and this will make bpf subsystem cannot be used under RT? Back then in ac00881f9221 ("bpf: convert hashtab lock to raw lock") Yang Shi converted spinlock_t to raw_spinlock_t due to exactly the above mentioned issue. I presume that hasn't changed, right? >> static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, >> @@ -325,7 +325,7 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, >> struct bpf_lru_node *node, *tmp_node; >> unsigned int nfree = 0; >> >> - raw_spin_lock(&l->lock); >> + spin_lock(&l->lock); >> >> __local_list_flush(l, loc_l); >> >> @@ -344,7 +344,7 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, >> local_free_list(loc_l), >> BPF_LRU_LOCAL_LIST_T_FREE); >> >> - raw_spin_unlock(&l->lock); >> + spin_unlock(&l->lock); >> } >> >> static void __local_list_add_pending(struct bpf_lru *lru, > [...] >> static void bpf_lru_list_init(struct bpf_lru_list *l) >> @@ -642,7 +642,7 @@ static void bpf_lru_list_init(struct bpf_lru_list *l) >> >> l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; >> >> - raw_spin_lock_init(&l->lock); >> + spin_lock_init(&l->lock); >> } >> >> int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, >> diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h >> index 7d4f89b7cb841..4e1e4608f1bb0 100644 >> --- a/kernel/bpf/bpf_lru_list.h >> +++ b/kernel/bpf/bpf_lru_list.h >> @@ -36,13 +36,13 @@ struct bpf_lru_list { >> /* The next inacitve list rotation starts from here */ >> struct list_head *next_inactive_rotation; >> >> - raw_spinlock_t lock ____cacheline_aligned_in_smp; >> + spinlock_t lock ____cacheline_aligned_in_smp; >> }; >> >> struct bpf_lru_locallist { >> struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T]; >> u16 next_steal; >> - raw_spinlock_t lock; >> + spinlock_t lock; >> }; >> >> struct bpf_common_lru { >>
On 2019-04-10 19:19:27 [+0200], Daniel Borkmann wrote: > On 04/10/2019 07:08 PM, Yonghong Song wrote: > > On 4/10/19 7:30 AM, Sebastian Andrzej Siewior wrote: > >> There is no difference between spinlock_t and raw_spinlock_t for !RT > >> kernels. It is possible that bpf_lru_list_pop_free_to_local() invokes > >> at least three list walks which look unbounded it probably makes sense > >> to use spinlock_t. > >> > >> Make bpf_lru_list use a spinlock_t. > > > > Could you add a cover letter for the patch set since you have > > more than one patch? yes. > >> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > >> --- > >> kernel/bpf/bpf_lru_list.c | 38 +++++++++++++++++++------------------- > >> kernel/bpf/bpf_lru_list.h | 4 ++-- > >> 2 files changed, 21 insertions(+), 21 deletions(-) > >> > >> diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c > >> index e6ef4401a1380..40f47210c3817 100644 > >> --- a/kernel/bpf/bpf_lru_list.c > >> +++ b/kernel/bpf/bpf_lru_list.c > >> @@ -313,9 +313,9 @@ static void bpf_lru_list_push_free(struct bpf_lru_list *l, > >> if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) > >> return; > >> > >> - raw_spin_lock_irqsave(&l->lock, flags); > >> + spin_lock_irqsave(&l->lock, flags); > >> __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); > >> - raw_spin_unlock_irqrestore(&l->lock, flags); > >> + spin_unlock_irqrestore(&l->lock, flags); > >> } > > > > This function (plus many others) is called by bpf program helpers which > > cannot sleep. Is it possible that under RT spin_lock_irqsave() could > > sleep and this will make bpf subsystem cannot be used under RT? > > Back then in ac00881f9221 ("bpf: convert hashtab lock to raw lock") > Yang Shi converted spinlock_t to raw_spinlock_t due to exactly the > above mentioned issue. I presume that hasn't changed, right? Ah. I checked one or two of those and it looked like it was raw since the beginning. Anyway, it would be nice to Cc: the RT developer while fiddling with something that only concerns RT. That usage pattern that is mentioned in ac00881f9221, is it true for all data structure algorithms? In bpf_lru_list I was concerned about the list loops. However hashtab and lpm_trie may perform memory allocations while holding the lock and this isn't going to work. Sebastian
On 4/10/19 10:44 AM, Sebastian Andrzej Siewior wrote: > On 2019-04-10 19:19:27 [+0200], Daniel Borkmann wrote: >> On 04/10/2019 07:08 PM, Yonghong Song wrote: >>> On 4/10/19 7:30 AM, Sebastian Andrzej Siewior wrote: >>>> There is no difference between spinlock_t and raw_spinlock_t for !RT >>>> kernels. It is possible that bpf_lru_list_pop_free_to_local() invokes >>>> at least three list walks which look unbounded it probably makes sense >>>> to use spinlock_t. >>>> >>>> Make bpf_lru_list use a spinlock_t. >>> >>> Could you add a cover letter for the patch set since you have >>> more than one patch? > > yes. > >>>> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> >>>> --- >>>> kernel/bpf/bpf_lru_list.c | 38 +++++++++++++++++++------------------- >>>> kernel/bpf/bpf_lru_list.h | 4 ++-- >>>> 2 files changed, 21 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c >>>> index e6ef4401a1380..40f47210c3817 100644 >>>> --- a/kernel/bpf/bpf_lru_list.c >>>> +++ b/kernel/bpf/bpf_lru_list.c >>>> @@ -313,9 +313,9 @@ static void bpf_lru_list_push_free(struct bpf_lru_list *l, >>>> if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) >>>> return; >>>> >>>> - raw_spin_lock_irqsave(&l->lock, flags); >>>> + spin_lock_irqsave(&l->lock, flags); >>>> __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); >>>> - raw_spin_unlock_irqrestore(&l->lock, flags); >>>> + spin_unlock_irqrestore(&l->lock, flags); >>>> } >>> >>> This function (plus many others) is called by bpf program helpers which >>> cannot sleep. Is it possible that under RT spin_lock_irqsave() could >>> sleep and this will make bpf subsystem cannot be used under RT? >> >> Back then in ac00881f9221 ("bpf: convert hashtab lock to raw lock") >> Yang Shi converted spinlock_t to raw_spinlock_t due to exactly the >> above mentioned issue. I presume that hasn't changed, right? > > Ah. I checked one or two of those and it looked like it was raw since > the beginning. Anyway, it would be nice to Cc: the RT developer while > fiddling with something that only concerns RT. > > That usage pattern that is mentioned in ac00881f9221, is it true for all > data structure algorithms? In bpf_lru_list I was concerned about the > list loops. For some skewed hash tables with a lot of elements, yes, it is possible time to traverse the loop could be a little bit longer... > However hashtab and lpm_trie may perform memory allocations > while holding the lock and this isn't going to work. The memory allocation in these two places uses GFP_ATOMIC which should not sleep/block. > > Sebastian >
On 04/10/2019 07:44 PM, Sebastian Andrzej Siewior wrote: > On 2019-04-10 19:19:27 [+0200], Daniel Borkmann wrote: >> On 04/10/2019 07:08 PM, Yonghong Song wrote: >>> On 4/10/19 7:30 AM, Sebastian Andrzej Siewior wrote: >>>> There is no difference between spinlock_t and raw_spinlock_t for !RT >>>> kernels. It is possible that bpf_lru_list_pop_free_to_local() invokes >>>> at least three list walks which look unbounded it probably makes sense >>>> to use spinlock_t. >>>> >>>> Make bpf_lru_list use a spinlock_t. >>> >>> Could you add a cover letter for the patch set since you have >>> more than one patch? > > yes. > >>>> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> >>>> --- >>>> kernel/bpf/bpf_lru_list.c | 38 +++++++++++++++++++------------------- >>>> kernel/bpf/bpf_lru_list.h | 4 ++-- >>>> 2 files changed, 21 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c >>>> index e6ef4401a1380..40f47210c3817 100644 >>>> --- a/kernel/bpf/bpf_lru_list.c >>>> +++ b/kernel/bpf/bpf_lru_list.c >>>> @@ -313,9 +313,9 @@ static void bpf_lru_list_push_free(struct bpf_lru_list *l, >>>> if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) >>>> return; >>>> >>>> - raw_spin_lock_irqsave(&l->lock, flags); >>>> + spin_lock_irqsave(&l->lock, flags); >>>> __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); >>>> - raw_spin_unlock_irqrestore(&l->lock, flags); >>>> + spin_unlock_irqrestore(&l->lock, flags); >>>> } >>> >>> This function (plus many others) is called by bpf program helpers which >>> cannot sleep. Is it possible that under RT spin_lock_irqsave() could >>> sleep and this will make bpf subsystem cannot be used under RT? >> >> Back then in ac00881f9221 ("bpf: convert hashtab lock to raw lock") >> Yang Shi converted spinlock_t to raw_spinlock_t due to exactly the >> above mentioned issue. I presume that hasn't changed, right? > > Ah. I checked one or two of those and it looked like it was raw since > the beginning. Anyway, it would be nice to Cc: the RT developer while The later ones like form LRU may have just been adapted to use the same after the conversion from ac00881f9221, but I presume there might unfortunately be little to no testing on RT kernels currently. Hopefully we'll get there such that at min bots would yell if something is off so it can be fixed. > fiddling with something that only concerns RT. I think that was the case back then, see discussion / Cc list: https://lore.kernel.org/netdev/1446243386-26582-1-git-send-email-yang.shi@linaro.org/T/ > That usage pattern that is mentioned in ac00881f9221, is it true for all > data structure algorithms? In bpf_lru_list I was concerned about the > list loops. However hashtab and lpm_trie may perform memory allocations > while holding the lock and this isn't going to work. Do you have some document or guide for such typical patterns and how to make them behave better for RT? Thanks, Daniel
On 2019-04-10 18:35:21 [+0000], Yonghong Song wrote: > > Ah. I checked one or two of those and it looked like it was raw since > > the beginning. Anyway, it would be nice to Cc: the RT developer while > > fiddling with something that only concerns RT. > > > > That usage pattern that is mentioned in ac00881f9221, is it true for all > > data structure algorithms? In bpf_lru_list I was concerned about the > > list loops. > For some skewed hash tables with a lot of elements, yes, it is possible > time to traverse the loop could be a little bit longer... okay. So all that adds to your max latency which we try to keep low. If this is debugging only one could argue that it is okay. However if it is intended to be used in production then we usually try to keep as low as possible (i.e. have an upper limit). > > However hashtab and lpm_trie may perform memory allocations > > while holding the lock and this isn't going to work. > > The memory allocation in these two places uses GFP_ATOMIC which should > not sleep/block. That is not true for -RT usage. GFP_ATOMIC enables the usage of emergency memory pools (which are in general precious) and won't schedule() in order to free memory. This doesn't change on -RT. However, SLUB may call into the page allocator in order to get a fresh page and this is not working on -RT. This means you can't allocate memory from a preempt_disable() / local_irq_disable() section which includes your raw_spin_lock(). It works from spin_lock_irq() because that one is a sleeping lock on -RT. Sebastian
On 2019-04-10 21:44:22 [+0200], Daniel Borkmann wrote: > > Ah. I checked one or two of those and it looked like it was raw since > > the beginning. Anyway, it would be nice to Cc: the RT developer while > > The later ones like form LRU may have just been adapted to use the > same after the conversion from ac00881f9221, but I presume there might > unfortunately be little to no testing on RT kernels currently. Hopefully > we'll get there such that at min bots would yell if something is off > so it can be fixed. Thanks. The boot part what made me look at lpm_trie. > > fiddling with something that only concerns RT. > > I think that was the case back then, see discussion / Cc list: > > https://lore.kernel.org/netdev/1446243386-26582-1-git-send-email-yang.shi@linaro.org/T/ So there was a discussion and I somehow missed it. Fair enough. This memory allocation under the lock. Is that new or was it not seen back then? > > That usage pattern that is mentioned in ac00881f9221, is it true for all > > data structure algorithms? In bpf_lru_list I was concerned about the > > list loops. However hashtab and lpm_trie may perform memory allocations > > while holding the lock and this isn't going to work. > > Do you have some document or guide for such typical patterns and how to > make them behave better for RT? Let me put something simple together and once I have the pieces in lockdep I hope that there will be also a document explaining things in more detail. For now: try to keep you preemptible. - spin_lock() -> raw_spin_lock() is correct but raw_spin_lock() -> spin_lock() is not correct. - interrupts handlers run threaded (as with "threadirqs" command line). Most code therefore never really disables interrupts. This includes spin_lock_irq(). Therefore local_irq_disable() + spin_lock() != spin_lock_irq() - preempt_disable(), local_irq_disable(), raw_spin_lock() enables atomic context on -RT which makes scheduling impossible. - in atomic context - memory allocations are not possible (including GFP_ATOMIC). - a spin_lock() can not be acquired nor released. - unbounded loops add to task's max latency which should be avoided. - architecture's core runs with disabled interrupts and it is attempted to keep this part short. This includes even hrtimer callbacks which are not invoked with disabled interrupts. - core code uses raw_spin_lock() if it needs to protect something. If you think you need such a lock try to measure the worst case with cyclictest and see how it behaves. If it is visible then a different design should probably be used by shrinking the atomic section in order to become more deterministic. - debugging code often increases latency (lockdep even by few ms). Please document if this introduced unbounded atomic section is intended only for debugging. > Thanks, > Daniel Sebastian
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index e6ef4401a1380..40f47210c3817 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -313,9 +313,9 @@ static void bpf_lru_list_push_free(struct bpf_lru_list *l, if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) return; - raw_spin_lock_irqsave(&l->lock, flags); + spin_lock_irqsave(&l->lock, flags); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); - raw_spin_unlock_irqrestore(&l->lock, flags); + spin_unlock_irqrestore(&l->lock, flags); } static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, @@ -325,7 +325,7 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, struct bpf_lru_node *node, *tmp_node; unsigned int nfree = 0; - raw_spin_lock(&l->lock); + spin_lock(&l->lock); __local_list_flush(l, loc_l); @@ -344,7 +344,7 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); - raw_spin_unlock(&l->lock); + spin_unlock(&l->lock); } static void __local_list_add_pending(struct bpf_lru *lru, @@ -410,7 +410,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, l = per_cpu_ptr(lru->percpu_lru, cpu); - raw_spin_lock_irqsave(&l->lock, flags); + spin_lock_irqsave(&l->lock, flags); __bpf_lru_list_rotate(lru, l); @@ -426,7 +426,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); } - raw_spin_unlock_irqrestore(&l->lock, flags); + spin_unlock_irqrestore(&l->lock, flags); return node; } @@ -443,7 +443,7 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, loc_l = per_cpu_ptr(clru->local_list, cpu); - raw_spin_lock_irqsave(&loc_l->lock, flags); + spin_lock_irqsave(&loc_l->lock, flags); node = __local_list_pop_free(loc_l); if (!node) { @@ -454,7 +454,7 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, if (node) __local_list_add_pending(lru, loc_l, cpu, node, hash); - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + spin_unlock_irqrestore(&loc_l->lock, flags); if (node) return node; @@ -472,13 +472,13 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, do { steal_loc_l = per_cpu_ptr(clru->local_list, steal); - raw_spin_lock_irqsave(&steal_loc_l->lock, flags); + spin_lock_irqsave(&steal_loc_l->lock, flags); node = __local_list_pop_free(steal_loc_l); if (!node) node = __local_list_pop_pending(lru, steal_loc_l); - raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags); + spin_unlock_irqrestore(&steal_loc_l->lock, flags); steal = get_next_cpu(steal); } while (!node && steal != first_steal); @@ -486,9 +486,9 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, loc_l->next_steal = steal; if (node) { - raw_spin_lock_irqsave(&loc_l->lock, flags); + spin_lock_irqsave(&loc_l->lock, flags); __local_list_add_pending(lru, loc_l, cpu, node, hash); - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + spin_unlock_irqrestore(&loc_l->lock, flags); } return node; @@ -516,10 +516,10 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru, loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu); - raw_spin_lock_irqsave(&loc_l->lock, flags); + spin_lock_irqsave(&loc_l->lock, flags); if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) { - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + spin_unlock_irqrestore(&loc_l->lock, flags); goto check_lru_list; } @@ -527,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru, node->ref = 0; list_move(&node->list, local_free_list(loc_l)); - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + spin_unlock_irqrestore(&loc_l->lock, flags); return; } @@ -543,11 +543,11 @@ static void bpf_percpu_lru_push_free(struct bpf_lru *lru, l = per_cpu_ptr(lru->percpu_lru, node->cpu); - raw_spin_lock_irqsave(&l->lock, flags); + spin_lock_irqsave(&l->lock, flags); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); - raw_spin_unlock_irqrestore(&l->lock, flags); + spin_unlock_irqrestore(&l->lock, flags); } void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) @@ -627,7 +627,7 @@ static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu) loc_l->next_steal = cpu; - raw_spin_lock_init(&loc_l->lock); + spin_lock_init(&loc_l->lock); } static void bpf_lru_list_init(struct bpf_lru_list *l) @@ -642,7 +642,7 @@ static void bpf_lru_list_init(struct bpf_lru_list *l) l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; - raw_spin_lock_init(&l->lock); + spin_lock_init(&l->lock); } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 7d4f89b7cb841..4e1e4608f1bb0 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -36,13 +36,13 @@ struct bpf_lru_list { /* The next inacitve list rotation starts from here */ struct list_head *next_inactive_rotation; - raw_spinlock_t lock ____cacheline_aligned_in_smp; + spinlock_t lock ____cacheline_aligned_in_smp; }; struct bpf_lru_locallist { struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T]; u16 next_steal; - raw_spinlock_t lock; + spinlock_t lock; }; struct bpf_common_lru {
There is no difference between spinlock_t and raw_spinlock_t for !RT kernels. It is possible that bpf_lru_list_pop_free_to_local() invokes at least three list walks which look unbounded it probably makes sense to use spinlock_t. Make bpf_lru_list use a spinlock_t. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- kernel/bpf/bpf_lru_list.c | 38 +++++++++++++++++++------------------- kernel/bpf/bpf_lru_list.h | 4 ++-- 2 files changed, 21 insertions(+), 21 deletions(-)