Message ID | 1445468283-4592-1-git-send-email-ast@kernel.org |
---|---|
State | Superseded, archived |
Delegated to: | David Miller |
Headers | show |
After applying this patch I'm unable to use perf passing perf_event again like this: # perf record -a -e evt=cycles -e ./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls With -v it output: ... adding perf_bpf_probe:func_write adding perf_bpf_probe:func_write to 0x367d6a0 add bpf event perf_bpf_probe:func_write_return and attach bpf program 6 adding perf_bpf_probe:func_write_return adding perf_bpf_probe:func_write_return to 0x3a7fc40 mmap size 528384B ERROR: failed to insert value to pmu_map[0] ERROR: Apply config to BPF failed: Invalid option for map, add -v to see detail Opening /sys/kernel/debug/tracing//kprobe_events write= ... Looks like perf sets attr.inherit for cycles? I'll look into this problem. Thank you. On 2015/10/22 6:58, Alexei Starovoitov wrote: > Fix safety checks for bpf_perf_event_read(): > - only non-inherited events can be added to perf_event_array map > (do this check statically at map insertion time) > - dynamically check that event is local and !pmu->count > Otherwise buggy bpf program can cause kernel splat. > > Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > --- > v1->v2: fix compile in case of !CONFIG_PERF_EVENTS > > This patch is on top of > http://patchwork.ozlabs.org/patch/533585/ > to avoid conflicts. > Even in the worst case the crash is not possible. > Only warn_on_once, so imo net-next is ok. > > kernel/bpf/arraymap.c | 9 +++++---- > kernel/events/core.c | 16 ++++++++++------ > 2 files changed, 15 insertions(+), 10 deletions(-) > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c > index e3cfe46b074f..75529cc94304 100644 > --- a/kernel/bpf/arraymap.c > +++ b/kernel/bpf/arraymap.c > @@ -294,10 +294,11 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) > if (IS_ERR(attr)) > return (void *)attr; > > - if (attr->type != PERF_TYPE_RAW && > - !(attr->type == PERF_TYPE_SOFTWARE && > - attr->config == PERF_COUNT_SW_BPF_OUTPUT) && > - attr->type != PERF_TYPE_HARDWARE) { > + if ((attr->type != PERF_TYPE_RAW && > + !(attr->type == PERF_TYPE_SOFTWARE && > + attr->config == PERF_COUNT_SW_BPF_OUTPUT) && > + attr->type != PERF_TYPE_HARDWARE) || > + attr->inherit) { > perf_event_release_kernel(event); > return ERR_PTR(-EINVAL); > } > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 64754bfecd70..0b6333265872 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -3258,7 +3258,7 @@ static inline u64 perf_event_count(struct perf_event *event) > u64 perf_event_read_local(struct perf_event *event) > { > unsigned long flags; > - u64 val; > + u64 val = -EINVAL; > > /* > * Disabling interrupts avoids all counter scheduling (context > @@ -3267,12 +3267,14 @@ u64 perf_event_read_local(struct perf_event *event) > local_irq_save(flags); > > /* If this is a per-task event, it must be for current */ > - WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) && > - event->hw.target != current); > + if ((event->attach_state & PERF_ATTACH_TASK) && > + event->hw.target != current) > + goto out; > > /* If this is a per-CPU event, it must be for this CPU */ > - WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) && > - event->cpu != smp_processor_id()); > + if (!(event->attach_state & PERF_ATTACH_TASK) && > + event->cpu != smp_processor_id()) > + goto out; > > /* > * It must not be an event with inherit set, we cannot read > @@ -3284,7 +3286,8 @@ u64 perf_event_read_local(struct perf_event *event) > * It must not have a pmu::count method, those are not > * NMI safe. > */ > - WARN_ON_ONCE(event->pmu->count); > + if (event->pmu->count) > + goto out; > > /* > * If the event is currently on this CPU, its either a per-task event, > @@ -3295,6 +3298,7 @@ u64 perf_event_read_local(struct perf_event *event) > event->pmu->read(event); > > val = local64_read(&event->count); > +out: > local_irq_restore(flags); > > return val; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/21/15 9:49 PM, Wangnan (F) wrote: > After applying this patch I'm unable to use perf passing perf_event > again like this: please do not top post and trim your replies. > # perf record -a -e evt=cycles -e > ./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls > > With -v it output: > > ... > adding perf_bpf_probe:func_write > adding perf_bpf_probe:func_write to 0x367d6a0 > add bpf event perf_bpf_probe:func_write_return and attach bpf program 6 > adding perf_bpf_probe:func_write_return > adding perf_bpf_probe:func_write_return to 0x3a7fc40 > mmap size 528384B > ERROR: failed to insert value to pmu_map[0] > ERROR: Apply config to BPF failed: Invalid option for map, add -v to see > detail > Opening /sys/kernel/debug/tracing//kprobe_events write= > ... > > Looks like perf sets attr.inherit for cycles? I'll look into this problem. yes. that's perf default. How did it even work before?! I was testing with your samples/bpf/tracex6 that sets inherit to zero. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/10/22 13:00, Alexei Starovoitov wrote: > On 10/21/15 9:49 PM, Wangnan (F) wrote: >> After applying this patch I'm unable to use perf passing perf_event >> again like this: > > please do not top post and trim your replies. > >> # perf record -a -e evt=cycles -e >> ./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls >> >> With -v it output: >> >> ... >> adding perf_bpf_probe:func_write >> adding perf_bpf_probe:func_write to 0x367d6a0 >> add bpf event perf_bpf_probe:func_write_return and attach bpf program 6 >> adding perf_bpf_probe:func_write_return >> adding perf_bpf_probe:func_write_return to 0x3a7fc40 >> mmap size 528384B >> ERROR: failed to insert value to pmu_map[0] >> ERROR: Apply config to BPF failed: Invalid option for map, add -v to see >> detail >> Opening /sys/kernel/debug/tracing//kprobe_events write= >> ... >> >> Looks like perf sets attr.inherit for cycles? I'll look into this >> problem. > > yes. that's perf default. > How did it even work before?! > I was testing with your samples/bpf/tracex6 that sets inherit to zero. > Tested perf record -i option and it works for me: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -a -e evt=cycles -e ./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls # cat /sys/kernel/debug/tracing/trace | grep ls ls-8227 [001] dN.. 2526.184611: : pmu inc: 82270 ls-8227 [001] dN.. 2526.184626: : pmu inc: 40951 ls-8227 [001] dN.. 2526.184642: : pmu inc: 50659 ls-8227 [001] dN.. 2526.184657: : pmu inc: 43511 ls-8227 [001] dN.. 2526.184675: : pmu inc: 56921 ... And no warning message found in dmesg. So I think your fix is good, we should improve perf. Thank you. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/10/22 6:58, Alexei Starovoitov wrote: > Fix safety checks for bpf_perf_event_read(): > - only non-inherited events can be added to perf_event_array map > (do this check statically at map insertion time) > - dynamically check that event is local and !pmu->count > Otherwise buggy bpf program can cause kernel splat. > > Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > --- > v1->v2: fix compile in case of !CONFIG_PERF_EVENTS > > This patch is on top of > http://patchwork.ozlabs.org/patch/533585/ > to avoid conflicts. > Even in the worst case the crash is not possible. > Only warn_on_once, so imo net-next is ok. > > kernel/bpf/arraymap.c | 9 +++++---- > kernel/events/core.c | 16 ++++++++++------ > 2 files changed, 15 insertions(+), 10 deletions(-) > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c > index e3cfe46b074f..75529cc94304 100644 > --- a/kernel/bpf/arraymap.c > +++ b/kernel/bpf/arraymap.c > @@ -294,10 +294,11 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) > if (IS_ERR(attr)) > return (void *)attr; > > - if (attr->type != PERF_TYPE_RAW && > - !(attr->type == PERF_TYPE_SOFTWARE && > - attr->config == PERF_COUNT_SW_BPF_OUTPUT) && > - attr->type != PERF_TYPE_HARDWARE) { > + if ((attr->type != PERF_TYPE_RAW && > + !(attr->type == PERF_TYPE_SOFTWARE && > + attr->config == PERF_COUNT_SW_BPF_OUTPUT) && > + attr->type != PERF_TYPE_HARDWARE) || > + attr->inherit) { This 'if' statement is so complex. What about using a inline function instead? Thank you. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/21/15 10:31 PM, Wangnan (F) wrote: >> + if ((attr->type != PERF_TYPE_RAW && >> + !(attr->type == PERF_TYPE_SOFTWARE && >> + attr->config == PERF_COUNT_SW_BPF_OUTPUT) && >> + attr->type != PERF_TYPE_HARDWARE) || >> + attr->inherit) { > > This 'if' statement is so complex. What about using a inline function > instead? hmm. don't see how inline function will help readability. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/10/22 14:21, Alexei Starovoitov wrote: > On 10/21/15 10:31 PM, Wangnan (F) wrote: >>> + if ((attr->type != PERF_TYPE_RAW && >>> + !(attr->type == PERF_TYPE_SOFTWARE && >>> + attr->config == PERF_COUNT_SW_BPF_OUTPUT) && >>> + attr->type != PERF_TYPE_HARDWARE) || >>> + attr->inherit) { >> >> This 'if' statement is so complex. What about using a inline function >> instead? > > hmm. don't see how inline function will help readability. > For example (not tested): static inline bool perf_event_can_insert_to_map(struct perf_event_attr *attr) { /* is inherit? */ if (attr->inherit) return false; /* is software event? */ if (attr->type == PERF_TYPE_SOFTWARE) if (attr->config == PERF_COUNT_SW_BPF_OUTPUT) return true; else return false; /* Comment... */ if (attr->type == PERF_TYPE_RAW) return true; if (attr->type == PERF_TYPE_HARDWARE) return true; return false; } ... if (!perf_event_can_insert_to_map(attr)) .... Do you think redability is improved? Thank you. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/10/22 6:58, Alexei Starovoitov wrote: [SNIP] > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c > index e3cfe46b074f..75529cc94304 100644 > --- a/kernel/bpf/arraymap.c > +++ b/kernel/bpf/arraymap.c > @@ -294,10 +294,11 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) > if (IS_ERR(attr)) > return (void *)attr; > > - if (attr->type != PERF_TYPE_RAW && > - !(attr->type == PERF_TYPE_SOFTWARE && > - attr->config == PERF_COUNT_SW_BPF_OUTPUT) && > - attr->type != PERF_TYPE_HARDWARE) { > + if ((attr->type != PERF_TYPE_RAW && > + !(attr->type == PERF_TYPE_SOFTWARE && > + attr->config == PERF_COUNT_SW_BPF_OUTPUT) && > + attr->type != PERF_TYPE_HARDWARE) || > + attr->inherit) { > perf_event_release_kernel(event); > return ERR_PTR(-EINVAL); > } I have a question on inherit, not related to this patch: Is it safe for perf to disable attr->inherit if the event is system wide? I haven't read relate code completely. In my current knowledge the behavior of a system wide perf event should be same whether inherit is set or not. Is that true? Thank you. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Oct 22, 2015 at 08:30:36PM +0800, Wangnan (F) wrote: > I have a question on inherit, not related to this patch: > Is it safe for perf to disable attr->inherit if the event is system wide? > I haven't read relate code completely. In my current knowledge the behavior > of a system wide perf event should be same whether inherit is set or not. > Is that true? Yes, .inherit is pointless for cpu wide events, if we allow creating cpu events with .inherit set that's unfortunate. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Oct 21, 2015 at 03:58:03PM -0700, Alexei Starovoitov wrote: > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 64754bfecd70..0b6333265872 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -3258,7 +3258,7 @@ static inline u64 perf_event_count(struct perf_event *event) > u64 perf_event_read_local(struct perf_event *event) > { > unsigned long flags; > - u64 val; > + u64 val = -EINVAL; No, you cannot do this, -EINVAL is a valid count value. You simply must not call this function on !local events, ever. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/22/15 12:39 AM, Wangnan (F) wrote: > ... > if (!perf_event_can_insert_to_map(attr)) > .... > > Do you think redability is improved? yes. makes sense. will respin. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/22/15 6:57 AM, Peter Zijlstra wrote: > On Wed, Oct 21, 2015 at 03:58:03PM -0700, Alexei Starovoitov wrote: >> diff --git a/kernel/events/core.c b/kernel/events/core.c >> index 64754bfecd70..0b6333265872 100644 >> --- a/kernel/events/core.c >> +++ b/kernel/events/core.c >> @@ -3258,7 +3258,7 @@ static inline u64 perf_event_count(struct perf_event *event) >> u64 perf_event_read_local(struct perf_event *event) >> { >> unsigned long flags; >> - u64 val; >> + u64 val = -EINVAL; > > No, you cannot do this, -EINVAL is a valid count value. You simply must > not call this function on !local events, ever. agree. Will keep perf_event_read_local() as-is and do all safety checks on bpf side. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e3cfe46b074f..75529cc94304 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -294,10 +294,11 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) if (IS_ERR(attr)) return (void *)attr; - if (attr->type != PERF_TYPE_RAW && - !(attr->type == PERF_TYPE_SOFTWARE && - attr->config == PERF_COUNT_SW_BPF_OUTPUT) && - attr->type != PERF_TYPE_HARDWARE) { + if ((attr->type != PERF_TYPE_RAW && + !(attr->type == PERF_TYPE_SOFTWARE && + attr->config == PERF_COUNT_SW_BPF_OUTPUT) && + attr->type != PERF_TYPE_HARDWARE) || + attr->inherit) { perf_event_release_kernel(event); return ERR_PTR(-EINVAL); } diff --git a/kernel/events/core.c b/kernel/events/core.c index 64754bfecd70..0b6333265872 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3258,7 +3258,7 @@ static inline u64 perf_event_count(struct perf_event *event) u64 perf_event_read_local(struct perf_event *event) { unsigned long flags; - u64 val; + u64 val = -EINVAL; /* * Disabling interrupts avoids all counter scheduling (context @@ -3267,12 +3267,14 @@ u64 perf_event_read_local(struct perf_event *event) local_irq_save(flags); /* If this is a per-task event, it must be for current */ - WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) && - event->hw.target != current); + if ((event->attach_state & PERF_ATTACH_TASK) && + event->hw.target != current) + goto out; /* If this is a per-CPU event, it must be for this CPU */ - WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) && - event->cpu != smp_processor_id()); + if (!(event->attach_state & PERF_ATTACH_TASK) && + event->cpu != smp_processor_id()) + goto out; /* * It must not be an event with inherit set, we cannot read @@ -3284,7 +3286,8 @@ u64 perf_event_read_local(struct perf_event *event) * It must not have a pmu::count method, those are not * NMI safe. */ - WARN_ON_ONCE(event->pmu->count); + if (event->pmu->count) + goto out; /* * If the event is currently on this CPU, its either a per-task event, @@ -3295,6 +3298,7 @@ u64 perf_event_read_local(struct perf_event *event) event->pmu->read(event); val = local64_read(&event->count); +out: local_irq_restore(flags); return val;
Fix safety checks for bpf_perf_event_read(): - only non-inherited events can be added to perf_event_array map (do this check statically at map insertion time) - dynamically check that event is local and !pmu->count Otherwise buggy bpf program can cause kernel splat. Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") Signed-off-by: Alexei Starovoitov <ast@kernel.org> --- v1->v2: fix compile in case of !CONFIG_PERF_EVENTS This patch is on top of http://patchwork.ozlabs.org/patch/533585/ to avoid conflicts. Even in the worst case the crash is not possible. Only warn_on_once, so imo net-next is ok. kernel/bpf/arraymap.c | 9 +++++---- kernel/events/core.c | 16 ++++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-)