Message ID | 20230307143558.294354-2-vschneid@redhat.com |
---|---|
State | New |
Headers | show |
Series | Generic IPI sending tracepoint | expand |
On Tue, Mar 07, 2023 at 02:35:52PM +0000, Valentin Schneider wrote: > trace_ipi_raise() is unsuitable for generically tracing IPI sources due to > its "reason" argument being an uninformative string (on arm64 all you get > is "Function call interrupts" for SMP calls). > > Add a variant of it that exports a target cpumask, a callsite and a callback. > > Signed-off-by: Valentin Schneider <vschneid@redhat.com> > Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> > --- > include/trace/events/ipi.h | 22 ++++++++++++++++++++++ > 1 file changed, 22 insertions(+) > > diff --git a/include/trace/events/ipi.h b/include/trace/events/ipi.h > index 0be71dad6ec03..b1125dc27682c 100644 > --- a/include/trace/events/ipi.h > +++ b/include/trace/events/ipi.h > @@ -35,6 +35,28 @@ TRACE_EVENT(ipi_raise, > TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) > ); > > +TRACE_EVENT(ipi_send_cpumask, > + > + TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), > + > + TP_ARGS(cpumask, callsite, callback), > + > + TP_STRUCT__entry( > + __cpumask(cpumask) > + __field(void *, callsite) > + __field(void *, callback) > + ), > + > + TP_fast_assign( > + __assign_cpumask(cpumask, cpumask_bits(cpumask)); > + __entry->callsite = (void *)callsite; > + __entry->callback = callback; > + ), > + > + TP_printk("cpumask=%s callsite=%pS callback=%pS", > + __get_cpumask(cpumask), __entry->callsite, __entry->callback) > +); Would it make sense to add a variant like: ipi_send_cpu() that records a single cpu instead of a cpumask. A lot of sites seems to do: cpumask_of(cpu) for that first argument, and it seems to me it is quite daft to have to memcpy a full multi-word cpumask in those cases. Remember, nr_possible_cpus > 64 is quite common these days.
On Wed, Mar 22, 2023 at 10:39:55AM +0100, Peter Zijlstra wrote: > On Tue, Mar 07, 2023 at 02:35:52PM +0000, Valentin Schneider wrote: > > trace_ipi_raise() is unsuitable for generically tracing IPI sources due to > > its "reason" argument being an uninformative string (on arm64 all you get > > is "Function call interrupts" for SMP calls). > > > > Add a variant of it that exports a target cpumask, a callsite and a callback. > > > > Signed-off-by: Valentin Schneider <vschneid@redhat.com> > > Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> > > --- > > include/trace/events/ipi.h | 22 ++++++++++++++++++++++ > > 1 file changed, 22 insertions(+) > > > > diff --git a/include/trace/events/ipi.h b/include/trace/events/ipi.h > > index 0be71dad6ec03..b1125dc27682c 100644 > > --- a/include/trace/events/ipi.h > > +++ b/include/trace/events/ipi.h > > @@ -35,6 +35,28 @@ TRACE_EVENT(ipi_raise, > > TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) > > ); > > > > +TRACE_EVENT(ipi_send_cpumask, > > + > > + TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), > > + > > + TP_ARGS(cpumask, callsite, callback), > > + > > + TP_STRUCT__entry( > > + __cpumask(cpumask) > > + __field(void *, callsite) > > + __field(void *, callback) > > + ), > > + > > + TP_fast_assign( > > + __assign_cpumask(cpumask, cpumask_bits(cpumask)); > > + __entry->callsite = (void *)callsite; > > + __entry->callback = callback; > > + ), > > + > > + TP_printk("cpumask=%s callsite=%pS callback=%pS", > > + __get_cpumask(cpumask), __entry->callsite, __entry->callback) > > +); > > Would it make sense to add a variant like: ipi_send_cpu() that records a > single cpu instead of a cpumask. A lot of sites seems to do: > cpumask_of(cpu) for that first argument, and it seems to me it is quite > daft to have to memcpy a full multi-word cpumask in those cases. > > Remember, nr_possible_cpus > 64 is quite common these days. Something we litte bit like so... --- Subject: trace: Add trace_ipi_send_cpu() From: Peter Zijlstra <peterz@infradead.org> Date: Wed Mar 22 11:28:36 CET 2023 Because copying cpumasks around when targeting a single CPU is a bit daft... Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> --- include/linux/smp.h | 6 +++--- include/trace/events/ipi.h | 22 ++++++++++++++++++++++ kernel/irq_work.c | 6 ++---- kernel/smp.c | 4 ++-- 4 files changed, 29 insertions(+), 9 deletions(-) --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -130,9 +130,9 @@ extern void arch_smp_send_reschedule(int * scheduler_ipi() is inline so can't be passed as callback reason, but the * callsite IP should be sufficient for root-causing IPIs sent from here. */ -#define smp_send_reschedule(cpu) ({ \ - trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, NULL); \ - arch_smp_send_reschedule(cpu); \ +#define smp_send_reschedule(cpu) ({ \ + trace_ipi_send_cpu(cpu, _RET_IP_, NULL); \ + arch_smp_send_reschedule(cpu); \ }) /* --- a/include/trace/events/ipi.h +++ b/include/trace/events/ipi.h @@ -35,6 +35,28 @@ TRACE_EVENT(ipi_raise, TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) ); +TRACE_EVENT(ipi_send_cpu, + + TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), + + TP_ARGS(cpu, callsite, callback), + + TP_STRUCT__entry( + __field(unsigned int, cpu) + __field(void *, callsite) + __field(void *, callback) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->callsite = (void *)callsite; + __entry->callback = callback; + ), + + TP_printk("cpu=%s callsite=%pS callback=%pS", + __entry->cpu, __entry->callsite, __entry->callback) +); + TRACE_EVENT(ipi_send_cpumask, TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -78,10 +78,8 @@ void __weak arch_irq_work_raise(void) static __always_inline void irq_work_raise(struct irq_work *work) { - if (trace_ipi_send_cpumask_enabled() && arch_irq_work_has_interrupt()) - trace_ipi_send_cpumask(cpumask_of(smp_processor_id()), - _RET_IP_, - work->func); + if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt()) + trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func); arch_irq_work_raise(); } --- a/kernel/smp.c +++ b/kernel/smp.c @@ -109,7 +109,7 @@ static __always_inline void send_call_function_single_ipi(int cpu, smp_call_func_t func) { if (call_function_single_prep_ipi(cpu)) { - trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, func); + trace_ipi_send_cpu(cpu, _RET_IP_, func); arch_send_call_function_single_ipi(cpu); } } @@ -348,7 +348,7 @@ void __smp_call_single_queue(int cpu, st * even if we haven't sent the smp_call IPI yet (e.g. the stopper * executes migration_cpu_stop() on the remote CPU). */ - if (trace_ipi_send_cpumask_enabled()) { + if (trace_ipi_send_cpu_enabled()) { call_single_data_t *csd; smp_call_func_t func;
On 22/03/23 11:30, Peter Zijlstra wrote: > On Wed, Mar 22, 2023 at 10:39:55AM +0100, Peter Zijlstra wrote: >> On Tue, Mar 07, 2023 at 02:35:52PM +0000, Valentin Schneider wrote: >> > +TRACE_EVENT(ipi_send_cpumask, >> > + >> > + TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), >> > + >> > + TP_ARGS(cpumask, callsite, callback), >> > + >> > + TP_STRUCT__entry( >> > + __cpumask(cpumask) >> > + __field(void *, callsite) >> > + __field(void *, callback) >> > + ), >> > + >> > + TP_fast_assign( >> > + __assign_cpumask(cpumask, cpumask_bits(cpumask)); >> > + __entry->callsite = (void *)callsite; >> > + __entry->callback = callback; >> > + ), >> > + >> > + TP_printk("cpumask=%s callsite=%pS callback=%pS", >> > + __get_cpumask(cpumask), __entry->callsite, __entry->callback) >> > +); >> >> Would it make sense to add a variant like: ipi_send_cpu() that records a >> single cpu instead of a cpumask. A lot of sites seems to do: >> cpumask_of(cpu) for that first argument, and it seems to me it is quite >> daft to have to memcpy a full multi-word cpumask in those cases. >> >> Remember, nr_possible_cpus > 64 is quite common these days. > > Something we litte bit like so... > I was wondering whether we could stick with a single trace event, but let ftrace be aware of weight=1 vs weight>1 cpumasks. For weight>1, it would memcpy() as usual, for weight=1, it could write a pointer to a cpu_bit_bitmap[] equivalent embedded in the trace itself. Unfortunately, Ftrace bitmasks are represented as a u32 made of two 16 bit values: [offset in event record, size], so there isn't a straightforward way to point to a "reusable" cpumask. AFAICT the only alternative would be to do that via a different trace event, but then we should just go with a plain old uint - i.e. do what you're doing here, so: Tested-and-reviewed-by: Valentin Schneider <vschneid@redhat.com> (with the tiny typo fix below) > @@ -35,6 +35,28 @@ TRACE_EVENT(ipi_raise, > TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) > ); > > +TRACE_EVENT(ipi_send_cpu, > + > + TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), > + > + TP_ARGS(cpu, callsite, callback), > + > + TP_STRUCT__entry( > + __field(unsigned int, cpu) > + __field(void *, callsite) > + __field(void *, callback) > + ), > + > + TP_fast_assign( > + __entry->cpu = cpu; > + __entry->callsite = (void *)callsite; > + __entry->callback = callback; > + ), > + > + TP_printk("cpu=%s callsite=%pS callback=%pS", ^ s/s/u/ > + __entry->cpu, __entry->callsite, __entry->callback) > +); > +
diff --git a/include/trace/events/ipi.h b/include/trace/events/ipi.h index 0be71dad6ec03..b1125dc27682c 100644 --- a/include/trace/events/ipi.h +++ b/include/trace/events/ipi.h @@ -35,6 +35,28 @@ TRACE_EVENT(ipi_raise, TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) ); +TRACE_EVENT(ipi_send_cpumask, + + TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), + + TP_ARGS(cpumask, callsite, callback), + + TP_STRUCT__entry( + __cpumask(cpumask) + __field(void *, callsite) + __field(void *, callback) + ), + + TP_fast_assign( + __assign_cpumask(cpumask, cpumask_bits(cpumask)); + __entry->callsite = (void *)callsite; + __entry->callback = callback; + ), + + TP_printk("cpumask=%s callsite=%pS callback=%pS", + __get_cpumask(cpumask), __entry->callsite, __entry->callback) +); + DECLARE_EVENT_CLASS(ipi_handler, TP_PROTO(const char *reason),