diff mbox

hw_breakpoint: Fix Oops at destroying hw_breakpoint event on powerpc

Message ID 1456912517-5711-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com (mailing list archive)
State Accepted
Headers show

Commit Message

Ravi Bangoria March 2, 2016, 9:55 a.m. UTC
At a time of destroying hw_breakpoint event, kernel ends up with Oops.
Here is the sample output from 4.5.0-rc6 kernel.

  [  450.708568] Unable to handle kernel paging request for data at address 0x00000c07
  [  450.708684] Faulting instruction address: 0xc0000000000291d0
  [  450.708750] Oops: Kernel access of bad area, sig: 11 [#1]
  [  450.708798] SMP NR_CPUS=1024 NUMA pSeries
  [  450.708856] Modules linked in: stap_4c2bdcf3e1aee79b646bb9a844e600f7__4962(O) xt_CHECKSUM ...
  [  450.709539] CPU: 5 PID: 5106 Comm: perf_fuzzer Tainted: G           O    4.5.0-rc5+ #1
  [  450.709620] task: c0000000f8795c80 ti: c0000000e3340000 task.ti: c0000000e3340000
  [  450.709691] NIP: c0000000000291d0 LR: c00000000020b6b4 CTR: c00000000020b6f0
  [  450.709760] REGS: c0000000e3343760 TRAP: 0300   Tainted: G           O     (4.5.0-rc5+)
  [  450.709831] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 22008828  XER: 20000000
  [  450.710001] CFAR: c000000000010708 DAR: 0000000000000c07 DSISR: 42000000 SOFTE: 1
  GPR00: c00000000020b6b4 c0000000e33439e0 c000000001350900 c00000009efa7000
  GPR04: 0000000000000001 c00000009efa7000 0000000000000000 0000000000000001
  GPR08: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000000
  GPR12: c00000000020b6f0 c000000007e02800 c00000009efa5208 0000000000000000
  GPR16: 0000000000000001 ffffffffffffffff 0000000000000000 c0000000f3ad7f10
  GPR20: c0000000f87964c8 0000000000000001 c0000000f8795c80 fffffffffffffffd
  GPR24: 0000000000000000 c0000000f3ad7f08 c0000000f3ad7f68 c00000009efa6800
  GPR28: c0000000f3ad7f00 c00000009efa5000 c000000001259520 c00000009efa7000
  [  450.710996] NIP [c0000000000291d0] arch_unregister_hw_breakpoint+0x40/0x60
  [  450.711066] LR [c00000000020b6b4] release_bp_slot+0x44/0x80
  [  450.711117] Call Trace:
  [  450.711165] [c0000000e33439e0] [c0000000009c1e38] mutex_lock+0x28/0x70 (unreliable)
  [  450.711257] [c0000000e3343a10] [c00000000020b6b4] release_bp_slot+0x44/0x80
  [  450.711332] [c0000000e3343a40] [c0000000002036c8] _free_event+0xd8/0x350
  [  450.711404] [c0000000e3343a70] [c000000000208260] perf_event_exit_task+0x2b0/0x4c0
  [  450.711490] [c0000000e3343b20] [c0000000000b8ac8] do_exit+0x388/0xc60
  [  450.711563] [c0000000e3343be0] [c0000000000b9484] do_group_exit+0x64/0x100
  [  450.711641] [c0000000e3343c20] [c0000000000c9100] get_signal+0x220/0x770
  [  450.711716] [c0000000e3343d10] [c000000000017884] do_signal+0x54/0x2b0
  [  450.711793] [c0000000e3343e00] [c000000000017cac] do_notify_resume+0xbc/0xd0
  [  450.711865] [c0000000e3343e30] [c000000000009838] ret_from_except_lite+0x64/0x68
  [  450.711948] Instruction dump:
  [  450.711986] f8010010 f821ffd1 7c7f1b78 60000000 60000000 e93f01e8 2fa90000 419e0018
  [  450.712107] e9290098 2fa90000 419e000c 39400000 <f9490c08> 38210030 e8010010 ebe1fff8
  [  450.712230] ---[ end trace 3cf087de955e9358 ]---

Call chain:

  hw_breakpoint_event_init()
    bp->destroy = bp_perf_event_destroy;

  do_exit()
    perf_event_exit_task()
      perf_event_exit_task_context()
        WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
        perf_event_exit_event()
          free_event()
            _free_event()
              bp_perf_event_destroy()//event->destroy(event);
                release_bp_slot()
                  arch_unregister_hw_breakpoint()

perf_event_exit_task_context sets child_ctx->task as TASK_TOMBSTONE
which is (void *)-1. arch_unregister_hw_breakpoint tries to fetch
'thread' attribute of 'task' resulting in Oops.

This patch adds one more condition before accessing data from 'task'.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/hw_breakpoint.c | 3 ++-
 include/linux/perf_event.h          | 2 ++
 kernel/events/core.c                | 2 --
 3 files changed, 4 insertions(+), 3 deletions(-)

Comments

Peter Zijlstra March 2, 2016, 11:44 a.m. UTC | #1
On Wed, Mar 02, 2016 at 03:25:17PM +0530, Ravi Bangoria wrote:
> At a time of destroying hw_breakpoint event, kernel ends up with Oops.
> Here is the sample output from 4.5.0-rc6 kernel.

> Call chain:
> 
>   hw_breakpoint_event_init()
>     bp->destroy = bp_perf_event_destroy;
> 
>   do_exit()
>     perf_event_exit_task()
>       perf_event_exit_task_context()
>         WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
>         perf_event_exit_event()
>           free_event()
>             _free_event()
>               bp_perf_event_destroy()//event->destroy(event);
>                 release_bp_slot()
>                   arch_unregister_hw_breakpoint()
> 
> perf_event_exit_task_context sets child_ctx->task as TASK_TOMBSTONE
> which is (void *)-1. arch_unregister_hw_breakpoint tries to fetch
> 'thread' attribute of 'task' resulting in Oops.
> 
> This patch adds one more condition before accessing data from 'task'.
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/hw_breakpoint.c | 3 ++-
>  include/linux/perf_event.h          | 2 ++
>  kernel/events/core.c                | 2 --
>  3 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index 05e804c..43d8496 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -29,6 +29,7 @@
>  #include <linux/kernel.h>
>  #include <linux/sched.h>
>  #include <linux/smp.h>
> +#include <linux/perf_event.h>
>  
>  #include <asm/hw_breakpoint.h>
>  #include <asm/processor.h>
> @@ -110,7 +111,7 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
>  	 * and the single_step_dabr_instruction(), then cleanup the breakpoint
>  	 * restoration variables to prevent dangling pointers.
>  	 */
> -	if (bp->ctx && bp->ctx->task)
> +	if (bp->ctx && bp->ctx->task && bp->ctx->task != TASK_TOMBSTONE)
>  		bp->ctx->task->thread.last_hit_ubp = NULL;
>  }

Yuck.. you should not be touching ctx (esp not this late).

And I suppose you cannot use bp->hw.target either because we don't
actually keep that up-to-date.

Why do you keep per task state anyway? What do per-cpu breakpoints do?
Michael Ellerman March 2, 2016, 11:53 a.m. UTC | #2
Hi Ravi,

On Wed, 2016-02-03 at 09:55:17 UTC, Ravi Bangoria wrote:
> At a time of destroying hw_breakpoint event, kernel ends up with Oops.
> Here is the sample output from 4.5.0-rc6 kernel.

It's nice to trim the oops a bit, the time stamps aren't useful. And the full
GPRs is probably not useful information either.

>   [  450.708568] Unable to handle kernel paging request for data at address 0x00000c07
>   [  450.708684] Faulting instruction address: 0xc0000000000291d0
>   [  450.708750] Oops: Kernel access of bad area, sig: 11 [#1]
>   [  450.708798] SMP NR_CPUS=1024 NUMA pSeries
>   [  450.708856] Modules linked in: stap_4c2bdcf3e1aee79b646bb9a844e600f7__4962(O) xt_CHECKSUM ...
>   [  450.709539] CPU: 5 PID: 5106 Comm: perf_fuzzer Tainted: G           O    4.5.0-rc5+ #1
>   [  450.709620] task: c0000000f8795c80 ti: c0000000e3340000 task.ti: c0000000e3340000
>   [  450.709691] NIP: c0000000000291d0 LR: c00000000020b6b4 CTR: c00000000020b6f0
>   [  450.709760] REGS: c0000000e3343760 TRAP: 0300   Tainted: G           O     (4.5.0-rc5+)
>   [  450.709831] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 22008828  XER: 20000000
>   [  450.710001] CFAR: c000000000010708 DAR: 0000000000000c07 DSISR: 42000000 SOFTE: 1
>   GPR00: c00000000020b6b4 c0000000e33439e0 c000000001350900 c00000009efa7000
>   GPR04: 0000000000000001 c00000009efa7000 0000000000000000 0000000000000001
>   GPR08: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000000
>   GPR12: c00000000020b6f0 c000000007e02800 c00000009efa5208 0000000000000000
>   GPR16: 0000000000000001 ffffffffffffffff 0000000000000000 c0000000f3ad7f10
>   GPR20: c0000000f87964c8 0000000000000001 c0000000f8795c80 fffffffffffffffd
>   GPR24: 0000000000000000 c0000000f3ad7f08 c0000000f3ad7f68 c00000009efa6800
>   GPR28: c0000000f3ad7f00 c00000009efa5000 c000000001259520 c00000009efa7000
>   [  450.710996] NIP [c0000000000291d0] arch_unregister_hw_breakpoint+0x40/0x60
>   [  450.711066] LR [c00000000020b6b4] release_bp_slot+0x44/0x80
>   [  450.711117] Call Trace:
>   [  450.711165] [c0000000e33439e0] [c0000000009c1e38] mutex_lock+0x28/0x70 (unreliable)
>   [  450.711257] [c0000000e3343a10] [c00000000020b6b4] release_bp_slot+0x44/0x80
>   [  450.711332] [c0000000e3343a40] [c0000000002036c8] _free_event+0xd8/0x350
>   [  450.711404] [c0000000e3343a70] [c000000000208260] perf_event_exit_task+0x2b0/0x4c0
>   [  450.711490] [c0000000e3343b20] [c0000000000b8ac8] do_exit+0x388/0xc60
>   [  450.711563] [c0000000e3343be0] [c0000000000b9484] do_group_exit+0x64/0x100
>   [  450.711641] [c0000000e3343c20] [c0000000000c9100] get_signal+0x220/0x770
>   [  450.711716] [c0000000e3343d10] [c000000000017884] do_signal+0x54/0x2b0
>   [  450.711793] [c0000000e3343e00] [c000000000017cac] do_notify_resume+0xbc/0xd0
>   [  450.711865] [c0000000e3343e30] [c000000000009838] ret_from_except_lite+0x64/0x68
>   [  450.711948] Instruction dump:
>   [  450.711986] f8010010 f821ffd1 7c7f1b78 60000000 60000000 e93f01e8 2fa90000 419e0018
>   [  450.712107] e9290098 2fa90000 419e000c 39400000 <f9490c08> 38210030 e8010010 ebe1fff8
>   [  450.712230] ---[ end trace 3cf087de955e9358 ]---
> 
> Call chain:
> 
>   hw_breakpoint_event_init()
>     bp->destroy = bp_perf_event_destroy;
> 
>   do_exit()
>     perf_event_exit_task()
>       perf_event_exit_task_context()
>         WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
>         perf_event_exit_event()
>           free_event()
>             _free_event()
>               bp_perf_event_destroy()//event->destroy(event);
>                 release_bp_slot()
>                   arch_unregister_hw_breakpoint()
> 
> perf_event_exit_task_context sets child_ctx->task as TASK_TOMBSTONE
> which is (void *)-1. arch_unregister_hw_breakpoint tries to fetch
> 'thread' attribute of 'task' resulting in Oops.
> 
> This patch adds one more condition before accessing data from 'task'.

I assume this was broken by 63b6da39bb38 ("perf: Fix perf_event_exit_task() race") ?

If so you should say so and add:

  Fixes: 63b6da39bb38 ("perf: Fix perf_event_exit_task() race")

That also means we want it to go into v4.5, which means someone needs to merge
it soon.

Peterz, acme, do you guys want to take this? Or should I?

If the former here's an ack:

Acked-by: Michael Ellerman <mpe@ellerman.id.au>

cheers
Peter Zijlstra March 2, 2016, 11:59 a.m. UTC | #3
On Wed, Mar 02, 2016 at 10:53:24PM +1100, Michael Ellerman wrote:
> Peterz, acme, do you guys want to take this? Or should I?

I'm not too happy its touching event->ctx at all. It really should not
be doing that.
Michael Ellerman March 5, 2016, 10:29 p.m. UTC | #4
On Wed, 2016-02-03 at 09:55:17 UTC, Ravi Bangoria wrote:
> At a time of destroying hw_breakpoint event, kernel ends up with Oops.
> Here is the sample output from 4.5.0-rc6 kernel.

I merged the revised version, as discussed with peterz.

https://git.kernel.org/powerpc/c/fb822e6076d972691c5dd33431

cheers
diff mbox

Patch

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804c..43d8496 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,6 +29,7 @@ 
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/perf_event.h>
 
 #include <asm/hw_breakpoint.h>
 #include <asm/processor.h>
@@ -110,7 +111,7 @@  void arch_unregister_hw_breakpoint(struct perf_event *bp)
 	 * and the single_step_dabr_instruction(), then cleanup the breakpoint
 	 * restoration variables to prevent dangling pointers.
 	 */
-	if (bp->ctx && bp->ctx->task)
+	if (bp->ctx && bp->ctx->task && bp->ctx->task != TASK_TOMBSTONE)
 		bp->ctx->task->thread.last_hit_ubp = NULL;
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f5c5a3f..491c50e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1192,4 +1192,6 @@  _name##_show(struct device *dev,					\
 									\
 static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
 
+#define TASK_TOMBSTONE ((void *)-1L)
+
 #endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6146148..121f30c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -159,8 +159,6 @@  static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
 	raw_spin_unlock(&cpuctx->ctx.lock);
 }
 
-#define TASK_TOMBSTONE ((void *)-1L)
-
 static bool is_kernel_event(struct perf_event *event)
 {
 	return READ_ONCE(event->owner) == TASK_TOMBSTONE;