Message ID | 1594390953-31757-2-git-send-email-alan.maguire@oracle.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: fix use of trace_printk() in BPF | expand |
On Fri, Jul 10, 2020 at 7:25 AM Alan Maguire <alan.maguire@oracle.com> wrote: > > The bpf helper bpf_trace_printk() uses trace_printk() under the hood. > This leads to an alarming warning message originating from trace > buffer allocation which occurs the first time a program using > bpf_trace_printk() is loaded. > > We can instead create a trace event for bpf_trace_printk() and enable > it in-kernel when/if we encounter a program using the > bpf_trace_printk() helper. With this approach, trace_printk() > is not used directly and no warning message appears. > > This work was started by Steven (see Link) and finished by Alan; added > Steven's Signed-off-by with his permission. > > Link: https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home > Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> > Signed-off-by: Alan Maguire <alan.maguire@oracle.com> > --- LGTM. Acked-by: Andrii Nakryiko <andriin@fb.com> > kernel/trace/Makefile | 2 ++ > kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++++----- > kernel/trace/bpf_trace.h | 34 ++++++++++++++++++++++++++++++++++ > 3 files changed, 72 insertions(+), 5 deletions(-) > create mode 100644 kernel/trace/bpf_trace.h > [...]
On 7/10/20 4:22 PM, Alan Maguire wrote: > The bpf helper bpf_trace_printk() uses trace_printk() under the hood. > This leads to an alarming warning message originating from trace > buffer allocation which occurs the first time a program using > bpf_trace_printk() is loaded. > > We can instead create a trace event for bpf_trace_printk() and enable > it in-kernel when/if we encounter a program using the > bpf_trace_printk() helper. With this approach, trace_printk() > is not used directly and no warning message appears. > > This work was started by Steven (see Link) and finished by Alan; added > Steven's Signed-off-by with his permission. > > Link: https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home > Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> > Signed-off-by: Alan Maguire <alan.maguire@oracle.com> > --- > kernel/trace/Makefile | 2 ++ > kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++++----- > kernel/trace/bpf_trace.h | 34 ++++++++++++++++++++++++++++++++++ > 3 files changed, 72 insertions(+), 5 deletions(-) > create mode 100644 kernel/trace/bpf_trace.h > > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile > index 6575bb0..aeba5ee 100644 > --- a/kernel/trace/Makefile > +++ b/kernel/trace/Makefile > @@ -31,6 +31,8 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE > GCOV_PROFILE := y > endif > > +CFLAGS_bpf_trace.o := -I$(src) > + > CFLAGS_trace_benchmark.o := -I$(src) > CFLAGS_trace_events_filter.o := -I$(src) > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 1d874d8..1414bf5 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -11,6 +11,7 @@ > #include <linux/uaccess.h> > #include <linux/ctype.h> > #include <linux/kprobes.h> > +#include <linux/spinlock.h> > #include <linux/syscalls.h> > #include <linux/error-injection.h> > > @@ -19,6 +20,9 @@ > #include "trace_probe.h" > #include "trace.h" > > +#define CREATE_TRACE_POINTS > +#include "bpf_trace.h" > + > #define bpf_event_rcu_dereference(p) \ > rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) > > @@ -374,6 +378,29 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, > } > } > > +static DEFINE_RAW_SPINLOCK(trace_printk_lock); > + > +#define BPF_TRACE_PRINTK_SIZE 1024 > + > + nit: double newline > +static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) > +{ > + static char buf[BPF_TRACE_PRINTK_SIZE]; > + unsigned long flags; > + va_list ap; > + int ret; > + > + raw_spin_lock_irqsave(&trace_printk_lock, flags); > + va_start(ap, fmt); > + ret = vsnprintf(buf, BPF_TRACE_PRINTK_SIZE, fmt, ap); nit: s/BPF_TRACE_PRINTK_SIZE/sizeof(buf)/ > + va_end(ap); > + if (ret >= 0) > + trace_bpf_trace_printk(buf); Is there a specific reason you added the 'ret >= 0' check on top of [0]? Given the vsnprintf() internals you either return 0 or number of characters generated, no? [0] https://lore.kernel.org/bpf/20200628194334.6238b933@oasis.local.home/ Rest lgtm, thanks! > + raw_spin_unlock_irqrestore(&trace_printk_lock, flags); > + > + return ret; > +} > + > /* > * Only limited trace_printk() conversion specifiers allowed: > * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s > @@ -483,8 +510,7 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, > */ > #define __BPF_TP_EMIT() __BPF_ARG3_TP() > #define __BPF_TP(...) \ > - __trace_printk(0 /* Fake ip */, \ > - fmt, ##__VA_ARGS__) > + bpf_do_trace_printk(fmt, ##__VA_ARGS__) > > #define __BPF_ARG1_TP(...) \ > ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ > @@ -521,10 +547,15 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, > const struct bpf_func_proto *bpf_get_trace_printk_proto(void) > { > /* > - * this program might be calling bpf_trace_printk, > - * so allocate per-cpu printk buffers > + * This program might be calling bpf_trace_printk, > + * so enable the associated bpf_trace/bpf_trace_printk event. > + * Repeat this each time as it is possible a user has > + * disabled bpf_trace_printk events. By loading a program > + * calling bpf_trace_printk() however the user has expressed > + * the intent to see such events. > */ > - trace_printk_init_buffers(); > + if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) > + pr_warn_ratelimited("could not enable bpf_trace_printk events"); > > return &bpf_trace_printk_proto; > } > diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h > new file mode 100644 > index 0000000..9acbc11 > --- /dev/null > +++ b/kernel/trace/bpf_trace.h > @@ -0,0 +1,34 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#undef TRACE_SYSTEM > +#define TRACE_SYSTEM bpf_trace > + > +#if !defined(_TRACE_BPF_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) > + > +#define _TRACE_BPF_TRACE_H > + > +#include <linux/tracepoint.h> > + > +TRACE_EVENT(bpf_trace_printk, > + > + TP_PROTO(const char *bpf_string), > + > + TP_ARGS(bpf_string), > + > + TP_STRUCT__entry( > + __string(bpf_string, bpf_string) > + ), > + > + TP_fast_assign( > + __assign_str(bpf_string, bpf_string); > + ), > + > + TP_printk("%s", __get_str(bpf_string)) > +); > + > +#endif /* _TRACE_BPF_TRACE_H */ > + > +#undef TRACE_INCLUDE_PATH > +#define TRACE_INCLUDE_PATH . > +#define TRACE_INCLUDE_FILE bpf_trace > + > +#include <trace/define_trace.h> >
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6575bb0..aeba5ee 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -31,6 +31,8 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE GCOV_PROFILE := y endif +CFLAGS_bpf_trace.o := -I$(src) + CFLAGS_trace_benchmark.o := -I$(src) CFLAGS_trace_events_filter.o := -I$(src) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1d874d8..1414bf5 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -11,6 +11,7 @@ #include <linux/uaccess.h> #include <linux/ctype.h> #include <linux/kprobes.h> +#include <linux/spinlock.h> #include <linux/syscalls.h> #include <linux/error-injection.h> @@ -19,6 +20,9 @@ #include "trace_probe.h" #include "trace.h" +#define CREATE_TRACE_POINTS +#include "bpf_trace.h" + #define bpf_event_rcu_dereference(p) \ rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) @@ -374,6 +378,29 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, } } +static DEFINE_RAW_SPINLOCK(trace_printk_lock); + +#define BPF_TRACE_PRINTK_SIZE 1024 + + +static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) +{ + static char buf[BPF_TRACE_PRINTK_SIZE]; + unsigned long flags; + va_list ap; + int ret; + + raw_spin_lock_irqsave(&trace_printk_lock, flags); + va_start(ap, fmt); + ret = vsnprintf(buf, BPF_TRACE_PRINTK_SIZE, fmt, ap); + va_end(ap); + if (ret >= 0) + trace_bpf_trace_printk(buf); + raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + + return ret; +} + /* * Only limited trace_printk() conversion specifiers allowed: * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s @@ -483,8 +510,7 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, */ #define __BPF_TP_EMIT() __BPF_ARG3_TP() #define __BPF_TP(...) \ - __trace_printk(0 /* Fake ip */, \ - fmt, ##__VA_ARGS__) + bpf_do_trace_printk(fmt, ##__VA_ARGS__) #define __BPF_ARG1_TP(...) \ ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ @@ -521,10 +547,15 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, const struct bpf_func_proto *bpf_get_trace_printk_proto(void) { /* - * this program might be calling bpf_trace_printk, - * so allocate per-cpu printk buffers + * This program might be calling bpf_trace_printk, + * so enable the associated bpf_trace/bpf_trace_printk event. + * Repeat this each time as it is possible a user has + * disabled bpf_trace_printk events. By loading a program + * calling bpf_trace_printk() however the user has expressed + * the intent to see such events. */ - trace_printk_init_buffers(); + if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) + pr_warn_ratelimited("could not enable bpf_trace_printk events"); return &bpf_trace_printk_proto; } diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h new file mode 100644 index 0000000..9acbc11 --- /dev/null +++ b/kernel/trace/bpf_trace.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_trace + +#if !defined(_TRACE_BPF_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _TRACE_BPF_TRACE_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(bpf_trace_printk, + + TP_PROTO(const char *bpf_string), + + TP_ARGS(bpf_string), + + TP_STRUCT__entry( + __string(bpf_string, bpf_string) + ), + + TP_fast_assign( + __assign_str(bpf_string, bpf_string); + ), + + TP_printk("%s", __get_str(bpf_string)) +); + +#endif /* _TRACE_BPF_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE bpf_trace + +#include <trace/define_trace.h>