diff mbox series

[07/14] powerpc: Add support for restartable sequences

Message ID 20180430224433.17407-8-mathieu.desnoyers@efficios.com (mailing list archive)
State Not Applicable
Headers show
Series None | expand

Commit Message

Mathieu Desnoyers April 30, 2018, 10:44 p.m. UTC
From: Boqun Feng <boqun.feng@gmail.com>

Call the rseq_handle_notify_resume() function on return to userspace if
TIF_NOTIFY_RESUME thread flag is set.

Perform fixup on the pre-signal when a signal is delivered on top of a
restartable sequence critical section.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Michael Ellerman <mpe@ellerman.id.au>
CC: Peter Zijlstra <peterz@infradead.org>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/Kconfig         | 1 +
 arch/powerpc/kernel/signal.c | 3 +++
 2 files changed, 4 insertions(+)

Comments

Peter Zijlstra May 16, 2018, 4:18 p.m. UTC | #1
On Mon, Apr 30, 2018 at 06:44:26PM -0400, Mathieu Desnoyers wrote:
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index c32a181a7cbb..ed21a777e8c6 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -223,6 +223,7 @@ config PPC
>  	select HAVE_SYSCALL_TRACEPOINTS
>  	select HAVE_VIRT_CPU_ACCOUNTING
>  	select HAVE_IRQ_TIME_ACCOUNTING
> +	select HAVE_RSEQ
>  	select IRQ_DOMAIN
>  	select IRQ_FORCED_THREADING
>  	select MODULES_USE_ELF_RELA
> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> index 61db86ecd318..d3bb3aaaf5ac 100644
> --- a/arch/powerpc/kernel/signal.c
> +++ b/arch/powerpc/kernel/signal.c
> @@ -133,6 +133,8 @@ static void do_signal(struct task_struct *tsk)
>  	/* Re-enable the breakpoints for the signal stack */
>  	thread_change_pc(tsk, tsk->thread.regs);
>  
> +	rseq_signal_deliver(tsk->thread.regs);
> +
>  	if (is32) {
>          	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
>  			ret = handle_rt_signal32(&ksig, oldset, tsk);
> @@ -164,6 +166,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
>  	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
>  		clear_thread_flag(TIF_NOTIFY_RESUME);
>  		tracehook_notify_resume(regs);
> +		rseq_handle_notify_resume(regs);
>  	}
>  
>  	user_enter();

Again no rseq_syscall().
Mathieu Desnoyers May 16, 2018, 8:13 p.m. UTC | #2
----- On May 16, 2018, at 12:18 PM, Peter Zijlstra peterz@infradead.org wrote:

> On Mon, Apr 30, 2018 at 06:44:26PM -0400, Mathieu Desnoyers wrote:
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index c32a181a7cbb..ed21a777e8c6 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -223,6 +223,7 @@ config PPC
>>  	select HAVE_SYSCALL_TRACEPOINTS
>>  	select HAVE_VIRT_CPU_ACCOUNTING
>>  	select HAVE_IRQ_TIME_ACCOUNTING
>> +	select HAVE_RSEQ
>>  	select IRQ_DOMAIN
>>  	select IRQ_FORCED_THREADING
>>  	select MODULES_USE_ELF_RELA
>> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
>> index 61db86ecd318..d3bb3aaaf5ac 100644
>> --- a/arch/powerpc/kernel/signal.c
>> +++ b/arch/powerpc/kernel/signal.c
>> @@ -133,6 +133,8 @@ static void do_signal(struct task_struct *tsk)
>>  	/* Re-enable the breakpoints for the signal stack */
>>  	thread_change_pc(tsk, tsk->thread.regs);
>>  
>> +	rseq_signal_deliver(tsk->thread.regs);
>> +
>>  	if (is32) {
>>          	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
>>  			ret = handle_rt_signal32(&ksig, oldset, tsk);
>> @@ -164,6 +166,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long
>> thread_info_flags)
>>  	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
>>  		clear_thread_flag(TIF_NOTIFY_RESUME);
>>  		tracehook_notify_resume(regs);
>> +		rseq_handle_notify_resume(regs);
>>  	}
>>  
>>  	user_enter();
> 
> Again no rseq_syscall().

Same question for PowerPC as for ARM:

Considering that rseq_syscall is implemented as follows:

+void rseq_syscall(struct pt_regs *regs)
+{
+       unsigned long ip = instruction_pointer(regs);
+       struct task_struct *t = current;
+       struct rseq_cs rseq_cs;
+
+       if (!t->rseq)
+               return;
+       if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) ||
+           rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
+               force_sig(SIGSEGV, t);
+}

and that x86 calls it from syscall_return_slowpath() (which AFAIU is
now used in the fast-path since KPTI), I wonder where we should call
this on PowerPC ?  I was under the impression that PowerPC return to
userspace fast-path was not calling C code unless work flags were set,
but I might be wrong.

Thoughts ?

Thanks!

Mathieu
Boqun Feng May 17, 2018, 1:19 a.m. UTC | #3
On Wed, May 16, 2018 at 04:13:16PM -0400, Mathieu Desnoyers wrote:
> ----- On May 16, 2018, at 12:18 PM, Peter Zijlstra peterz@infradead.org wrote:
> 
> > On Mon, Apr 30, 2018 at 06:44:26PM -0400, Mathieu Desnoyers wrote:
> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> >> index c32a181a7cbb..ed21a777e8c6 100644
> >> --- a/arch/powerpc/Kconfig
> >> +++ b/arch/powerpc/Kconfig
> >> @@ -223,6 +223,7 @@ config PPC
> >>  	select HAVE_SYSCALL_TRACEPOINTS
> >>  	select HAVE_VIRT_CPU_ACCOUNTING
> >>  	select HAVE_IRQ_TIME_ACCOUNTING
> >> +	select HAVE_RSEQ
> >>  	select IRQ_DOMAIN
> >>  	select IRQ_FORCED_THREADING
> >>  	select MODULES_USE_ELF_RELA
> >> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> >> index 61db86ecd318..d3bb3aaaf5ac 100644
> >> --- a/arch/powerpc/kernel/signal.c
> >> +++ b/arch/powerpc/kernel/signal.c
> >> @@ -133,6 +133,8 @@ static void do_signal(struct task_struct *tsk)
> >>  	/* Re-enable the breakpoints for the signal stack */
> >>  	thread_change_pc(tsk, tsk->thread.regs);
> >>  
> >> +	rseq_signal_deliver(tsk->thread.regs);
> >> +
> >>  	if (is32) {
> >>          	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
> >>  			ret = handle_rt_signal32(&ksig, oldset, tsk);
> >> @@ -164,6 +166,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long
> >> thread_info_flags)
> >>  	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
> >>  		clear_thread_flag(TIF_NOTIFY_RESUME);
> >>  		tracehook_notify_resume(regs);
> >> +		rseq_handle_notify_resume(regs);
> >>  	}
> >>  
> >>  	user_enter();
> > 
> > Again no rseq_syscall().
> 
> Same question for PowerPC as for ARM:
> 
> Considering that rseq_syscall is implemented as follows:
> 
> +void rseq_syscall(struct pt_regs *regs)
> +{
> +       unsigned long ip = instruction_pointer(regs);
> +       struct task_struct *t = current;
> +       struct rseq_cs rseq_cs;
> +
> +       if (!t->rseq)
> +               return;
> +       if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) ||
> +           rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
> +               force_sig(SIGSEGV, t);
> +}
> 
> and that x86 calls it from syscall_return_slowpath() (which AFAIU is
> now used in the fast-path since KPTI), I wonder where we should call

So we actually detect this after the syscall takes effect, right? I
wonder whether this could be problematic, because "disallowing syscall"
in rseq areas may means the syscall won't take effect to some people, I
guess?

> this on PowerPC ?  I was under the impression that PowerPC return to
> userspace fast-path was not calling C code unless work flags were set,
> but I might be wrong.
> 

I think you're right. So we have to introduce callsite to rseq_syscall()
in syscall path, something like:

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 51695608c68b..a25734a96640 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -222,6 +222,9 @@ system_call_exit:
 	mtmsrd	r11,1
 #endif /* CONFIG_PPC_BOOK3E */
 
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	bl	rseq_syscall
+
 	ld	r9,TI_FLAGS(r12)
 	li	r11,-MAX_ERRNO
 	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)

But I think it's important for us to first decide where (before or after
the syscall) we do the detection.

Regards,
Boqun

> Thoughts ?
> 
> Thanks!
> 
> Mathieu
> 
> -- 
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
Peter Zijlstra May 17, 2018, 7:43 a.m. UTC | #4
On Thu, May 17, 2018 at 09:19:49AM +0800, Boqun Feng wrote:
> On Wed, May 16, 2018 at 04:13:16PM -0400, Mathieu Desnoyers wrote:

> > and that x86 calls it from syscall_return_slowpath() (which AFAIU is
> > now used in the fast-path since KPTI), I wonder where we should call
> 
> So we actually detect this after the syscall takes effect, right? I
> wonder whether this could be problematic, because "disallowing syscall"
> in rseq areas may means the syscall won't take effect to some people, I
> guess?

It doesn't really matter I suspect, the important part is the program
getting killed.

I agree that doing it on sysenter is slightly nicer, but I'll take
sysexit if that's what it takes.

> > this on PowerPC ?  I was under the impression that PowerPC return to
> > userspace fast-path was not calling C code unless work flags were set,
> > but I might be wrong.
> > 
> 
> I think you're right. So we have to introduce callsite to rseq_syscall()
> in syscall path, something like:
> 
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 51695608c68b..a25734a96640 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -222,6 +222,9 @@ system_call_exit:
>  	mtmsrd	r11,1
>  #endif /* CONFIG_PPC_BOOK3E */
>  
> +	addi    r3,r1,STACK_FRAME_OVERHEAD
> +	bl	rseq_syscall
> +
>  	ld	r9,TI_FLAGS(r12)
>  	li	r11,-MAX_ERRNO
>  	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> 
> But I think it's important for us to first decide where (before or after
> the syscall) we do the detection.

The important thing is the processed getting very dead. Either sysenter
or sysexit gets that done.
Mathieu Desnoyers May 17, 2018, 3:28 p.m. UTC | #5
----- On May 16, 2018, at 9:19 PM, Boqun Feng boqun.feng@gmail.com wrote:

> On Wed, May 16, 2018 at 04:13:16PM -0400, Mathieu Desnoyers wrote:
>> ----- On May 16, 2018, at 12:18 PM, Peter Zijlstra peterz@infradead.org wrote:
>> 
>> > On Mon, Apr 30, 2018 at 06:44:26PM -0400, Mathieu Desnoyers wrote:
>> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> >> index c32a181a7cbb..ed21a777e8c6 100644
>> >> --- a/arch/powerpc/Kconfig
>> >> +++ b/arch/powerpc/Kconfig
>> >> @@ -223,6 +223,7 @@ config PPC
>> >>  	select HAVE_SYSCALL_TRACEPOINTS
>> >>  	select HAVE_VIRT_CPU_ACCOUNTING
>> >>  	select HAVE_IRQ_TIME_ACCOUNTING
>> >> +	select HAVE_RSEQ
>> >>  	select IRQ_DOMAIN
>> >>  	select IRQ_FORCED_THREADING
>> >>  	select MODULES_USE_ELF_RELA
>> >> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
>> >> index 61db86ecd318..d3bb3aaaf5ac 100644
>> >> --- a/arch/powerpc/kernel/signal.c
>> >> +++ b/arch/powerpc/kernel/signal.c
>> >> @@ -133,6 +133,8 @@ static void do_signal(struct task_struct *tsk)
>> >>  	/* Re-enable the breakpoints for the signal stack */
>> >>  	thread_change_pc(tsk, tsk->thread.regs);
>> >>  
>> >> +	rseq_signal_deliver(tsk->thread.regs);
>> >> +
>> >>  	if (is32) {
>> >>          	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
>> >>  			ret = handle_rt_signal32(&ksig, oldset, tsk);
>> >> @@ -164,6 +166,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long
>> >> thread_info_flags)
>> >>  	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
>> >>  		clear_thread_flag(TIF_NOTIFY_RESUME);
>> >>  		tracehook_notify_resume(regs);
>> >> +		rseq_handle_notify_resume(regs);
>> >>  	}
>> >>  
>> >>  	user_enter();
>> > 
>> > Again no rseq_syscall().
>> 
>> Same question for PowerPC as for ARM:
>> 
>> Considering that rseq_syscall is implemented as follows:
>> 
>> +void rseq_syscall(struct pt_regs *regs)
>> +{
>> +       unsigned long ip = instruction_pointer(regs);
>> +       struct task_struct *t = current;
>> +       struct rseq_cs rseq_cs;
>> +
>> +       if (!t->rseq)
>> +               return;
>> +       if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) ||
>> +           rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
>> +               force_sig(SIGSEGV, t);
>> +}
>> 
>> and that x86 calls it from syscall_return_slowpath() (which AFAIU is
>> now used in the fast-path since KPTI), I wonder where we should call
> 
> So we actually detect this after the syscall takes effect, right? I
> wonder whether this could be problematic, because "disallowing syscall"
> in rseq areas may means the syscall won't take effect to some people, I
> guess?
> 
>> this on PowerPC ?  I was under the impression that PowerPC return to
>> userspace fast-path was not calling C code unless work flags were set,
>> but I might be wrong.
>> 
> 
> I think you're right. So we have to introduce callsite to rseq_syscall()
> in syscall path, something like:
> 
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 51695608c68b..a25734a96640 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -222,6 +222,9 @@ system_call_exit:
> 	mtmsrd	r11,1
> #endif /* CONFIG_PPC_BOOK3E */
> 
> +	addi    r3,r1,STACK_FRAME_OVERHEAD
> +	bl	rseq_syscall
> +
> 	ld	r9,TI_FLAGS(r12)
> 	li	r11,-MAX_ERRNO
> 	andi.
> 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> 
> But I think it's important for us to first decide where (before or after
> the syscall) we do the detection.

As Peter said, we don't really care whether it's on syscall entry or exit, as
long as the process gets killed when the erroneous use is detected. I think doing
it on syscall exit is a bit easier because we can clearly access the userspace
TLS, which AFAIU may be less straightforward on syscall entry.

We may want to add #ifdef CONFIG_DEBUG_RSEQ / #endif around the code you
proposed above, so it's only compiled in if CONFIG_DEBUG_RSEQ=y.

On the ARM leg of the email thread, Will Deacon suggests to test whether current->rseq
is non-NULL before calling rseq_syscall(). I wonder if this added check is justified
as the assembly level, considering that this is just a debugging option. We already do
that check at the very beginning of rseq_syscall().

Thoughts ?

Thanks,

Mathieu

> 
> Regards,
> Boqun
> 
>> Thoughts ?
>> 
>> Thanks!
>> 
>> Mathieu
>> 
>> --
>> Mathieu Desnoyers
>> EfficiOS Inc.
> > http://www.efficios.com
Boqun Feng May 17, 2018, 11:50 p.m. UTC | #6
On Thu, May 17, 2018, at 11:28 PM, Mathieu Desnoyers wrote:
> ----- On May 16, 2018, at 9:19 PM, Boqun Feng boqun.feng@gmail.com wrote:
> 
> > On Wed, May 16, 2018 at 04:13:16PM -0400, Mathieu Desnoyers wrote:
> >> ----- On May 16, 2018, at 12:18 PM, Peter Zijlstra peterz@infradead.org wrote:
> >> 
> >> > On Mon, Apr 30, 2018 at 06:44:26PM -0400, Mathieu Desnoyers wrote:
> >> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> >> >> index c32a181a7cbb..ed21a777e8c6 100644
> >> >> --- a/arch/powerpc/Kconfig
> >> >> +++ b/arch/powerpc/Kconfig
> >> >> @@ -223,6 +223,7 @@ config PPC
> >> >>  	select HAVE_SYSCALL_TRACEPOINTS
> >> >>  	select HAVE_VIRT_CPU_ACCOUNTING
> >> >>  	select HAVE_IRQ_TIME_ACCOUNTING
> >> >> +	select HAVE_RSEQ
> >> >>  	select IRQ_DOMAIN
> >> >>  	select IRQ_FORCED_THREADING
> >> >>  	select MODULES_USE_ELF_RELA
> >> >> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> >> >> index 61db86ecd318..d3bb3aaaf5ac 100644
> >> >> --- a/arch/powerpc/kernel/signal.c
> >> >> +++ b/arch/powerpc/kernel/signal.c
> >> >> @@ -133,6 +133,8 @@ static void do_signal(struct task_struct *tsk)
> >> >>  	/* Re-enable the breakpoints for the signal stack */
> >> >>  	thread_change_pc(tsk, tsk->thread.regs);
> >> >>  
> >> >> +	rseq_signal_deliver(tsk->thread.regs);
> >> >> +
> >> >>  	if (is32) {
> >> >>          	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
> >> >>  			ret = handle_rt_signal32(&ksig, oldset, tsk);
> >> >> @@ -164,6 +166,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long
> >> >> thread_info_flags)
> >> >>  	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
> >> >>  		clear_thread_flag(TIF_NOTIFY_RESUME);
> >> >>  		tracehook_notify_resume(regs);
> >> >> +		rseq_handle_notify_resume(regs);
> >> >>  	}
> >> >>  
> >> >>  	user_enter();
> >> > 
> >> > Again no rseq_syscall().
> >> 
> >> Same question for PowerPC as for ARM:
> >> 
> >> Considering that rseq_syscall is implemented as follows:
> >> 
> >> +void rseq_syscall(struct pt_regs *regs)
> >> +{
> >> +       unsigned long ip = instruction_pointer(regs);
> >> +       struct task_struct *t = current;
> >> +       struct rseq_cs rseq_cs;
> >> +
> >> +       if (!t->rseq)
> >> +               return;
> >> +       if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) ||
> >> +           rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
> >> +               force_sig(SIGSEGV, t);
> >> +}
> >> 
> >> and that x86 calls it from syscall_return_slowpath() (which AFAIU is
> >> now used in the fast-path since KPTI), I wonder where we should call
> > 
> > So we actually detect this after the syscall takes effect, right? I
> > wonder whether this could be problematic, because "disallowing syscall"
> > in rseq areas may means the syscall won't take effect to some people, I
> > guess?
> > 
> >> this on PowerPC ?  I was under the impression that PowerPC return to
> >> userspace fast-path was not calling C code unless work flags were set,
> >> but I might be wrong.
> >> 
> > 
> > I think you're right. So we have to introduce callsite to rseq_syscall()
> > in syscall path, something like:
> > 
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index 51695608c68b..a25734a96640 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -222,6 +222,9 @@ system_call_exit:
> > 	mtmsrd	r11,1
> > #endif /* CONFIG_PPC_BOOK3E */
> > 
> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
> > +	bl	rseq_syscall
> > +
> > 	ld	r9,TI_FLAGS(r12)
> > 	li	r11,-MAX_ERRNO
> > 	andi.
> > 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> > 
> > But I think it's important for us to first decide where (before or after
> > the syscall) we do the detection.
> 
> As Peter said, we don't really care whether it's on syscall entry or 
> exit, as
> long as the process gets killed when the erroneous use is detected. I 
> think doing
> it on syscall exit is a bit easier because we can clearly access the 
> userspace
> TLS, which AFAIU may be less straightforward on syscall entry.
>

Fair enough.
 
> We may want to add #ifdef CONFIG_DEBUG_RSEQ / #endif around the code you
> proposed above, so it's only compiled in if CONFIG_DEBUG_RSEQ=y.
> 

OK.

> On the ARM leg of the email thread, Will Deacon suggests to test whether 
> current->rseq
> is non-NULL before calling rseq_syscall(). I wonder if this added check 
> is justified
> as the assembly level, considering that this is just a debugging option. 
> We already do
> that check at the very beginning of rseq_syscall().
> 

Yes, I think it's better to do the check in rseq_syscall(), leaving the asm
code a bit cleaner.

Regards,
Boqun

> Thoughts ?
> 
> Thanks,
> 
> Mathieu
> 
> > 
> > Regards,
> > Boqun
> > 
> >> Thoughts ?
> >> 
> >> Thanks!
> >> 
> >> Mathieu
> >> 
> >> --
> >> Mathieu Desnoyers
> >> EfficiOS Inc.
> > > http://www.efficios.com
> 
> -- 
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
Michael Ellerman May 18, 2018, 12:38 p.m. UTC | #7
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> writes:
> ----- On May 16, 2018, at 9:19 PM, Boqun Feng boqun.feng@gmail.com wrote:
>> On Wed, May 16, 2018 at 04:13:16PM -0400, Mathieu Desnoyers wrote:
>>> ----- On May 16, 2018, at 12:18 PM, Peter Zijlstra peterz@infradead.org wrote:
>>> > On Mon, Apr 30, 2018 at 06:44:26PM -0400, Mathieu Desnoyers wrote:
>>> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>>> >> index c32a181a7cbb..ed21a777e8c6 100644
>>> >> --- a/arch/powerpc/Kconfig
>>> >> +++ b/arch/powerpc/Kconfig
>>> >> @@ -223,6 +223,7 @@ config PPC
>>> >>  	select HAVE_SYSCALL_TRACEPOINTS
>>> >>  	select HAVE_VIRT_CPU_ACCOUNTING
>>> >>  	select HAVE_IRQ_TIME_ACCOUNTING
>>> >> +	select HAVE_RSEQ
>>> >>  	select IRQ_DOMAIN
>>> >>  	select IRQ_FORCED_THREADING
>>> >>  	select MODULES_USE_ELF_RELA
>>> >> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
>>> >> index 61db86ecd318..d3bb3aaaf5ac 100644
>>> >> --- a/arch/powerpc/kernel/signal.c
>>> >> +++ b/arch/powerpc/kernel/signal.c
>>> >> @@ -133,6 +133,8 @@ static void do_signal(struct task_struct *tsk)
>>> >>  	/* Re-enable the breakpoints for the signal stack */
>>> >>  	thread_change_pc(tsk, tsk->thread.regs);
>>> >>  
>>> >> +	rseq_signal_deliver(tsk->thread.regs);
>>> >> +
>>> >>  	if (is32) {
>>> >>          	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
>>> >>  			ret = handle_rt_signal32(&ksig, oldset, tsk);
>>> >> @@ -164,6 +166,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long
>>> >> thread_info_flags)
>>> >>  	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
>>> >>  		clear_thread_flag(TIF_NOTIFY_RESUME);
>>> >>  		tracehook_notify_resume(regs);
>>> >> +		rseq_handle_notify_resume(regs);
>>> >>  	}
>>> >>  
>>> >>  	user_enter();
>>> > 
>>> > Again no rseq_syscall().
>>> 
>>> Same question for PowerPC as for ARM:
>>> 
>>> Considering that rseq_syscall is implemented as follows:
>>> 
>>> +void rseq_syscall(struct pt_regs *regs)
>>> +{
>>> +       unsigned long ip = instruction_pointer(regs);
>>> +       struct task_struct *t = current;
>>> +       struct rseq_cs rseq_cs;
>>> +
>>> +       if (!t->rseq)
>>> +               return;
>>> +       if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) ||
>>> +           rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
>>> +               force_sig(SIGSEGV, t);
>>> +}
>>> 
>>> and that x86 calls it from syscall_return_slowpath() (which AFAIU is
>>> now used in the fast-path since KPTI), I wonder where we should call
>> 
>> So we actually detect this after the syscall takes effect, right? I
>> wonder whether this could be problematic, because "disallowing syscall"
>> in rseq areas may means the syscall won't take effect to some people, I
>> guess?
>> 
>>> this on PowerPC ?  I was under the impression that PowerPC return to
>>> userspace fast-path was not calling C code unless work flags were set,
>>> but I might be wrong.
>>> 
>> 
>> I think you're right. So we have to introduce callsite to rseq_syscall()
>> in syscall path, something like:
>> 
>> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
>> index 51695608c68b..a25734a96640 100644
>> --- a/arch/powerpc/kernel/entry_64.S
>> +++ b/arch/powerpc/kernel/entry_64.S
>> @@ -222,6 +222,9 @@ system_call_exit:
>> 	mtmsrd	r11,1
>> #endif /* CONFIG_PPC_BOOK3E */
>> 
>> +	addi    r3,r1,STACK_FRAME_OVERHEAD
>> +	bl	rseq_syscall
>> +
>> 	ld	r9,TI_FLAGS(r12)
>> 	li	r11,-MAX_ERRNO
>> 	andi.
>> 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
>> 
>> But I think it's important for us to first decide where (before or after
>> the syscall) we do the detection.
>
> As Peter said, we don't really care whether it's on syscall entry or exit, as
> long as the process gets killed when the erroneous use is detected. I think doing
> it on syscall exit is a bit easier because we can clearly access the userspace
> TLS, which AFAIU may be less straightforward on syscall entry.

Coming in to the thread late, sorry if I'm missing the point.

> We may want to add #ifdef CONFIG_DEBUG_RSEQ / #endif around the code you
> proposed above, so it's only compiled in if CONFIG_DEBUG_RSEQ=y.

That sounds good. A function call is not free even if it returns immediately.

> On the ARM leg of the email thread, Will Deacon suggests to test whether current->rseq
> is non-NULL before calling rseq_syscall(). I wonder if this added check is justified
> as the assembly level, considering that this is just a debugging option. We already do
> that check at the very beginning of rseq_syscall().

I guess it depends if this is one of those "debugging options" that's
going to end up turned on in distro kernels?

I think in that code we'd need to check paca->current->rseq, so that
wouldn't be free either.

cheers
Mathieu Desnoyers May 18, 2018, 6:17 p.m. UTC | #8
----- On May 17, 2018, at 7:50 PM, Boqun Feng boqun.feng@gmail.com wrote:
[...]
>> > I think you're right. So we have to introduce callsite to rseq_syscall()
>> > in syscall path, something like:
>> > 
>> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
>> > index 51695608c68b..a25734a96640 100644
>> > --- a/arch/powerpc/kernel/entry_64.S
>> > +++ b/arch/powerpc/kernel/entry_64.S
>> > @@ -222,6 +222,9 @@ system_call_exit:
>> > 	mtmsrd	r11,1
>> > #endif /* CONFIG_PPC_BOOK3E */
>> > 
>> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
>> > +	bl	rseq_syscall
>> > +
>> > 	ld	r9,TI_FLAGS(r12)
>> > 	li	r11,-MAX_ERRNO
>> > 	andi.
>> > 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
>> > 

By the way, I think this is not the right spot to call rseq_syscall, because
interrupts are disabled. I think we should move this hunk right after system_call_exit.

Would you like to implement and test an updated patch adding those calls for ppc 32 and 64 ?

Thanks,

Mathieu
Boqun Feng May 20, 2018, 2:08 p.m. UTC | #9
On Fri, May 18, 2018 at 02:17:17PM -0400, Mathieu Desnoyers wrote:
> ----- On May 17, 2018, at 7:50 PM, Boqun Feng boqun.feng@gmail.com wrote:
> [...]
> >> > I think you're right. So we have to introduce callsite to rseq_syscall()
> >> > in syscall path, something like:
> >> > 
> >> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> >> > index 51695608c68b..a25734a96640 100644
> >> > --- a/arch/powerpc/kernel/entry_64.S
> >> > +++ b/arch/powerpc/kernel/entry_64.S
> >> > @@ -222,6 +222,9 @@ system_call_exit:
> >> > 	mtmsrd	r11,1
> >> > #endif /* CONFIG_PPC_BOOK3E */
> >> > 
> >> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
> >> > +	bl	rseq_syscall
> >> > +
> >> > 	ld	r9,TI_FLAGS(r12)
> >> > 	li	r11,-MAX_ERRNO
> >> > 	andi.
> >> > 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> >> > 
> 
> By the way, I think this is not the right spot to call rseq_syscall, because
> interrupts are disabled. I think we should move this hunk right after system_call_exit.
> 

Good point.

> Would you like to implement and test an updated patch adding those calls for ppc 32 and 64 ?
> 

I'd like to help, but I don't have a handy ppc environment for test...
So I made the below patch which has only been build-tested, hope it
could be somewhat helpful.

Regards,
Boqun

--------------------------------->8
Subject: [PATCH] powerpc: Add syscall detection for restartable sequences

Syscalls are not allowed inside restartable sequences, so add a call to
rseq_syscall() at the very beginning of system call exiting path for
CONFIG_DEBUG_RSEQ=y kernel. This could help us to detect whether there
is a syscall issued inside restartable sequences.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
---
 arch/powerpc/kernel/entry_32.S | 5 +++++
 arch/powerpc/kernel/entry_64.S | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index eb8d01bae8c6..2f134eebe7ed 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -365,6 +365,11 @@ syscall_dotrace_cont:
 	blrl			/* Call handler */
 	.globl	ret_from_syscall
 ret_from_syscall:
+#ifdef CONFIG_DEBUG_RSEQ
+	/* Check whether the syscall is issued inside a restartable sequence */
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	bl      rseq_syscall
+#endif
 	mr	r6,r3
 	CURRENT_THREAD_INFO(r12, r1)
 	/* disable interrupts so current_thread_info()->flags can't change */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2cb5109a7ea3..2e2d59bb45d0 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -204,6 +204,11 @@ system_call:			/* label this so stack traces look sane */
  * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
  */
 system_call_exit:
+#ifdef CONFIG_DEBUG_RSEQ
+	/* Check whether the syscall is issued inside a restartable sequence */
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	bl      rseq_syscall
+#endif
 	/*
 	 * Disable interrupts so current_thread_info()->flags can't change,
 	 * and so that we don't get interrupted after loading SRR0/1.
Mathieu Desnoyers May 23, 2018, 8:14 p.m. UTC | #10
----- On May 20, 2018, at 10:08 AM, Boqun Feng boqun.feng@gmail.com wrote:

> On Fri, May 18, 2018 at 02:17:17PM -0400, Mathieu Desnoyers wrote:
>> ----- On May 17, 2018, at 7:50 PM, Boqun Feng boqun.feng@gmail.com wrote:
>> [...]
>> >> > I think you're right. So we have to introduce callsite to rseq_syscall()
>> >> > in syscall path, something like:
>> >> > 
>> >> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
>> >> > index 51695608c68b..a25734a96640 100644
>> >> > --- a/arch/powerpc/kernel/entry_64.S
>> >> > +++ b/arch/powerpc/kernel/entry_64.S
>> >> > @@ -222,6 +222,9 @@ system_call_exit:
>> >> > 	mtmsrd	r11,1
>> >> > #endif /* CONFIG_PPC_BOOK3E */
>> >> > 
>> >> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
>> >> > +	bl	rseq_syscall
>> >> > +
>> >> > 	ld	r9,TI_FLAGS(r12)
>> >> > 	li	r11,-MAX_ERRNO
>> >> > 	andi.
>> >> > 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
>> >> > 
>> 
>> By the way, I think this is not the right spot to call rseq_syscall, because
>> interrupts are disabled. I think we should move this hunk right after
>> system_call_exit.
>> 
> 
> Good point.
> 
>> Would you like to implement and test an updated patch adding those calls for ppc
>> 32 and 64 ?
>> 
> 
> I'd like to help, but I don't have a handy ppc environment for test...
> So I made the below patch which has only been build-tested, hope it
> could be somewhat helpful.

Hi Boqun,

I tried your patch in a ppc64 le environment, and it does not survive boot
with CONFIG_DEBUG_RSEQ=y. init gets killed right away.

Moreover, I'm not sure that the r3 register don't contain something worth
saving before the call on ppc32. Just after there is a "mr" instruction
which AFAIU takes r3 as input register.

Can you look into it ?

Thanks,

Mathieu

> 
> Regards,
> Boqun
> 
> --------------------------------->8
> Subject: [PATCH] powerpc: Add syscall detection for restartable sequences
> 
> Syscalls are not allowed inside restartable sequences, so add a call to
> rseq_syscall() at the very beginning of system call exiting path for
> CONFIG_DEBUG_RSEQ=y kernel. This could help us to detect whether there
> is a syscall issued inside restartable sequences.
> 
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> ---
> arch/powerpc/kernel/entry_32.S | 5 +++++
> arch/powerpc/kernel/entry_64.S | 5 +++++
> 2 files changed, 10 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index eb8d01bae8c6..2f134eebe7ed 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -365,6 +365,11 @@ syscall_dotrace_cont:
> 	blrl			/* Call handler */
> 	.globl	ret_from_syscall
> ret_from_syscall:
> +#ifdef CONFIG_DEBUG_RSEQ
> +	/* Check whether the syscall is issued inside a restartable sequence */
> +	addi    r3,r1,STACK_FRAME_OVERHEAD
> +	bl      rseq_syscall
> +#endif
> 	mr	r6,r3
> 	CURRENT_THREAD_INFO(r12, r1)
> 	/* disable interrupts so current_thread_info()->flags can't change */
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 2cb5109a7ea3..2e2d59bb45d0 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -204,6 +204,11 @@ system_call:			/* label this so stack traces look sane */
>  * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
>  */
> system_call_exit:
> +#ifdef CONFIG_DEBUG_RSEQ
> +	/* Check whether the syscall is issued inside a restartable sequence */
> +	addi    r3,r1,STACK_FRAME_OVERHEAD
> +	bl      rseq_syscall
> +#endif
> 	/*
> 	 * Disable interrupts so current_thread_info()->flags can't change,
> 	 * and so that we don't get interrupted after loading SRR0/1.
> --
> 2.16.2
Paul E. McKenney May 23, 2018, 8:46 p.m. UTC | #11
On Wed, May 23, 2018 at 04:14:39PM -0400, Mathieu Desnoyers wrote:
> ----- On May 20, 2018, at 10:08 AM, Boqun Feng boqun.feng@gmail.com wrote:
> 
> > On Fri, May 18, 2018 at 02:17:17PM -0400, Mathieu Desnoyers wrote:
> >> ----- On May 17, 2018, at 7:50 PM, Boqun Feng boqun.feng@gmail.com wrote:
> >> [...]
> >> >> > I think you're right. So we have to introduce callsite to rseq_syscall()
> >> >> > in syscall path, something like:
> >> >> > 
> >> >> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> >> >> > index 51695608c68b..a25734a96640 100644
> >> >> > --- a/arch/powerpc/kernel/entry_64.S
> >> >> > +++ b/arch/powerpc/kernel/entry_64.S
> >> >> > @@ -222,6 +222,9 @@ system_call_exit:
> >> >> > 	mtmsrd	r11,1
> >> >> > #endif /* CONFIG_PPC_BOOK3E */
> >> >> > 
> >> >> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
> >> >> > +	bl	rseq_syscall
> >> >> > +
> >> >> > 	ld	r9,TI_FLAGS(r12)
> >> >> > 	li	r11,-MAX_ERRNO
> >> >> > 	andi.
> >> >> > 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> >> >> > 
> >> 
> >> By the way, I think this is not the right spot to call rseq_syscall, because
> >> interrupts are disabled. I think we should move this hunk right after
> >> system_call_exit.
> >> 
> > 
> > Good point.
> > 
> >> Would you like to implement and test an updated patch adding those calls for ppc
> >> 32 and 64 ?
> >> 
> > 
> > I'd like to help, but I don't have a handy ppc environment for test...
> > So I made the below patch which has only been build-tested, hope it
> > could be somewhat helpful.
> 
> Hi Boqun,
> 
> I tried your patch in a ppc64 le environment, and it does not survive boot
> with CONFIG_DEBUG_RSEQ=y. init gets killed right away.
> 
> Moreover, I'm not sure that the r3 register don't contain something worth
> saving before the call on ppc32. Just after there is a "mr" instruction
> which AFAIU takes r3 as input register.
> 
> Can you look into it ?

Hello, Boqun,

You can also request access to a ppc64 environment here:

	http://osuosl.org/services/powerdev/request_hosting/

							Thanx, Paul

> Thanks,
> 
> Mathieu
> 
> > 
> > Regards,
> > Boqun
> > 
> > --------------------------------->8
> > Subject: [PATCH] powerpc: Add syscall detection for restartable sequences
> > 
> > Syscalls are not allowed inside restartable sequences, so add a call to
> > rseq_syscall() at the very beginning of system call exiting path for
> > CONFIG_DEBUG_RSEQ=y kernel. This could help us to detect whether there
> > is a syscall issued inside restartable sequences.
> > 
> > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > ---
> > arch/powerpc/kernel/entry_32.S | 5 +++++
> > arch/powerpc/kernel/entry_64.S | 5 +++++
> > 2 files changed, 10 insertions(+)
> > 
> > diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> > index eb8d01bae8c6..2f134eebe7ed 100644
> > --- a/arch/powerpc/kernel/entry_32.S
> > +++ b/arch/powerpc/kernel/entry_32.S
> > @@ -365,6 +365,11 @@ syscall_dotrace_cont:
> > 	blrl			/* Call handler */
> > 	.globl	ret_from_syscall
> > ret_from_syscall:
> > +#ifdef CONFIG_DEBUG_RSEQ
> > +	/* Check whether the syscall is issued inside a restartable sequence */
> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
> > +	bl      rseq_syscall
> > +#endif
> > 	mr	r6,r3
> > 	CURRENT_THREAD_INFO(r12, r1)
> > 	/* disable interrupts so current_thread_info()->flags can't change */
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index 2cb5109a7ea3..2e2d59bb45d0 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -204,6 +204,11 @@ system_call:			/* label this so stack traces look sane */
> >  * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
> >  */
> > system_call_exit:
> > +#ifdef CONFIG_DEBUG_RSEQ
> > +	/* Check whether the syscall is issued inside a restartable sequence */
> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
> > +	bl      rseq_syscall
> > +#endif
> > 	/*
> > 	 * Disable interrupts so current_thread_info()->flags can't change,
> > 	 * and so that we don't get interrupted after loading SRR0/1.
> > --
> > 2.16.2
> 
> -- 
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
>
Mathieu Desnoyers May 23, 2018, 9:29 p.m. UTC | #12
----- On May 23, 2018, at 4:14 PM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:

> ----- On May 20, 2018, at 10:08 AM, Boqun Feng boqun.feng@gmail.com wrote:
> 
>> On Fri, May 18, 2018 at 02:17:17PM -0400, Mathieu Desnoyers wrote:
>>> ----- On May 17, 2018, at 7:50 PM, Boqun Feng boqun.feng@gmail.com wrote:
>>> [...]
>>> >> > I think you're right. So we have to introduce callsite to rseq_syscall()
>>> >> > in syscall path, something like:
>>> >> > 
>>> >> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
>>> >> > index 51695608c68b..a25734a96640 100644
>>> >> > --- a/arch/powerpc/kernel/entry_64.S
>>> >> > +++ b/arch/powerpc/kernel/entry_64.S
>>> >> > @@ -222,6 +222,9 @@ system_call_exit:
>>> >> > 	mtmsrd	r11,1
>>> >> > #endif /* CONFIG_PPC_BOOK3E */
>>> >> > 
>>> >> > +	addi    r3,r1,STACK_FRAME_OVERHEAD
>>> >> > +	bl	rseq_syscall
>>> >> > +
>>> >> > 	ld	r9,TI_FLAGS(r12)
>>> >> > 	li	r11,-MAX_ERRNO
>>> >> > 	andi.
>>> >> > 		r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
>>> >> > 
>>> 
>>> By the way, I think this is not the right spot to call rseq_syscall, because
>>> interrupts are disabled. I think we should move this hunk right after
>>> system_call_exit.
>>> 
>> 
>> Good point.
>> 
>>> Would you like to implement and test an updated patch adding those calls for ppc
>>> 32 and 64 ?
>>> 
>> 
>> I'd like to help, but I don't have a handy ppc environment for test...
>> So I made the below patch which has only been build-tested, hope it
>> could be somewhat helpful.
> 
> Hi Boqun,
> 
> I tried your patch in a ppc64 le environment, and it does not survive boot
> with CONFIG_DEBUG_RSEQ=y. init gets killed right away.

The following fixup gets ppc64 to work:

--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -208,6 +208,7 @@ system_call_exit:
        /* Check whether the syscall is issued inside a restartable sequence */
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      rseq_syscall
+       ld      r3,RESULT(r1)
 #endif
        /*
         * Disable interrupts so current_thread_info()->flags can't change,

> Moreover, I'm not sure that the r3 register don't contain something worth
> saving before the call on ppc32. Just after there is a "mr" instruction
> which AFAIU takes r3 as input register.

I'll start testing on ppc32 now.

Thanks,

Mathieu

> 
> Can you look into it ?
> 
> Thanks,
> 
> Mathieu
> 
>> 
>> Regards,
>> Boqun
>> 
>> --------------------------------->8
>> Subject: [PATCH] powerpc: Add syscall detection for restartable sequences
>> 
>> Syscalls are not allowed inside restartable sequences, so add a call to
>> rseq_syscall() at the very beginning of system call exiting path for
>> CONFIG_DEBUG_RSEQ=y kernel. This could help us to detect whether there
>> is a syscall issued inside restartable sequences.
>> 
>> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
>> ---
>> arch/powerpc/kernel/entry_32.S | 5 +++++
>> arch/powerpc/kernel/entry_64.S | 5 +++++
>> 2 files changed, 10 insertions(+)
>> 
>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
>> index eb8d01bae8c6..2f134eebe7ed 100644
>> --- a/arch/powerpc/kernel/entry_32.S
>> +++ b/arch/powerpc/kernel/entry_32.S
>> @@ -365,6 +365,11 @@ syscall_dotrace_cont:
>> 	blrl			/* Call handler */
>> 	.globl	ret_from_syscall
>> ret_from_syscall:
>> +#ifdef CONFIG_DEBUG_RSEQ
>> +	/* Check whether the syscall is issued inside a restartable sequence */
>> +	addi    r3,r1,STACK_FRAME_OVERHEAD
>> +	bl      rseq_syscall
>> +#endif
>> 	mr	r6,r3
>> 	CURRENT_THREAD_INFO(r12, r1)
>> 	/* disable interrupts so current_thread_info()->flags can't change */
>> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
>> index 2cb5109a7ea3..2e2d59bb45d0 100644
>> --- a/arch/powerpc/kernel/entry_64.S
>> +++ b/arch/powerpc/kernel/entry_64.S
>> @@ -204,6 +204,11 @@ system_call:			/* label this so stack traces look sane */
>>  * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
>>  */
>> system_call_exit:
>> +#ifdef CONFIG_DEBUG_RSEQ
>> +	/* Check whether the syscall is issued inside a restartable sequence */
>> +	addi    r3,r1,STACK_FRAME_OVERHEAD
>> +	bl      rseq_syscall
>> +#endif
>> 	/*
>> 	 * Disable interrupts so current_thread_info()->flags can't change,
>> 	 * and so that we don't get interrupted after loading SRR0/1.
>> --
>> 2.16.2
> 
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
Michael Ellerman May 24, 2018, 1:03 a.m. UTC | #13
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> writes:
> ----- On May 23, 2018, at 4:14 PM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:
...
>> 
>> Hi Boqun,
>> 
>> I tried your patch in a ppc64 le environment, and it does not survive boot
>> with CONFIG_DEBUG_RSEQ=y. init gets killed right away.


Sorry this code is super gross and hard to deal with.

> The following fixup gets ppc64 to work:
>
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -208,6 +208,7 @@ system_call_exit:
>         /* Check whether the syscall is issued inside a restartable sequence */
>         addi    r3,r1,STACK_FRAME_OVERHEAD
>         bl      rseq_syscall
> +       ld      r3,RESULT(r1)
>  #endif
>         /*
>          * Disable interrupts so current_thread_info()->flags can't change,

I don't think that's safe.

If you look above that, we have r3, r8 and r12 all live:

.Lsyscall_exit:
	std	r3,RESULT(r1)
	CURRENT_THREAD_INFO(r12, r1)

	ld	r8,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3S
	/* No MSR:RI on BookE */
	andi.	r10,r8,MSR_RI
	beq-	.Lunrecov_restore
#endif


They're all volatile across function calls:

  http://openpowerfoundation.org/wp-content/uploads/resources/leabi/content/dbdoclet.50655240_68174.html


The system_call_exit symbol is actually there for kprobes and cosmetic
purposes. The actual syscall return flow starts at .Lsyscall_exit.

So I think this would work:

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index db4df061c33a..e19f377a25e0 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -184,6 +184,14 @@ system_call:			/* label this so stack traces look sane */
 
 .Lsyscall_exit:
 	std	r3,RESULT(r1)
+
+#ifdef CONFIG_DEBUG_RSEQ
+	/* Check whether the syscall is issued inside a restartable sequence */
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	bl      rseq_syscall
+	ld	r3,RESULT(r1)
+#endif
+
 	CURRENT_THREAD_INFO(r12, r1)
 
 	ld	r8,_MSR(r1)


I'll try and get this series into my test setup at some point, been a
bit busy lately :)

cheers
Mathieu Desnoyers May 28, 2018, 7 a.m. UTC | #14
----- On May 24, 2018, at 3:03 AM, Michael Ellerman mpe@ellerman.id.au wrote:

> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> writes:
>> ----- On May 23, 2018, at 4:14 PM, Mathieu Desnoyers
>> mathieu.desnoyers@efficios.com wrote:
> ...
>>> 
>>> Hi Boqun,
>>> 
>>> I tried your patch in a ppc64 le environment, and it does not survive boot
>>> with CONFIG_DEBUG_RSEQ=y. init gets killed right away.
> 
> 
> Sorry this code is super gross and hard to deal with.
> 
>> The following fixup gets ppc64 to work:
>>
>> --- a/arch/powerpc/kernel/entry_64.S
>> +++ b/arch/powerpc/kernel/entry_64.S
>> @@ -208,6 +208,7 @@ system_call_exit:
>>         /* Check whether the syscall is issued inside a restartable sequence */
>>         addi    r3,r1,STACK_FRAME_OVERHEAD
>>         bl      rseq_syscall
>> +       ld      r3,RESULT(r1)
>>  #endif
>>         /*
>>          * Disable interrupts so current_thread_info()->flags can't change,
> 
> I don't think that's safe.
> 
> If you look above that, we have r3, r8 and r12 all live:
> 
> .Lsyscall_exit:
>	std	r3,RESULT(r1)
>	CURRENT_THREAD_INFO(r12, r1)
> 
>	ld	r8,_MSR(r1)
> #ifdef CONFIG_PPC_BOOK3S
>	/* No MSR:RI on BookE */
>	andi.	r10,r8,MSR_RI
>	beq-	.Lunrecov_restore
> #endif
> 
> 
> They're all volatile across function calls:
> 
>  http://openpowerfoundation.org/wp-content/uploads/resources/leabi/content/dbdoclet.50655240_68174.html
> 
> 
> The system_call_exit symbol is actually there for kprobes and cosmetic
> purposes. The actual syscall return flow starts at .Lsyscall_exit.
> 
> So I think this would work:
> 
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index db4df061c33a..e19f377a25e0 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -184,6 +184,14 @@ system_call:			/* label this so stack traces look sane */
> 
> .Lsyscall_exit:
> 	std	r3,RESULT(r1)
> +
> +#ifdef CONFIG_DEBUG_RSEQ
> +	/* Check whether the syscall is issued inside a restartable sequence */
> +	addi    r3,r1,STACK_FRAME_OVERHEAD
> +	bl      rseq_syscall
> +	ld	r3,RESULT(r1)
> +#endif
> +
> 	CURRENT_THREAD_INFO(r12, r1)
> 
> 	ld	r8,_MSR(r1)
> 
> 
> I'll try and get this series into my test setup at some point, been a
> bit busy lately :)

Yes, this was needed. I had this in my tree already, but there is still
a kernel OOPS when running the rseq selftests on ppc64 with CONFIG_DEBUG_RSEQ=y.

My current dev tree is at: https://github.com/compudj/linux-percpu-dev/tree/rseq/dev-local

So considering we are at rc7 now, should I plan to removing the powerpc bits
for merge window submission, or is there someone planning to spend time on
fixing and testing ppc integration before the merge window opens ?

Thanks,

Mathieu


> 
> cheers
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181a7cbb..ed21a777e8c6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -223,6 +223,7 @@  config PPC
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_RSEQ
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 61db86ecd318..d3bb3aaaf5ac 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -133,6 +133,8 @@  static void do_signal(struct task_struct *tsk)
 	/* Re-enable the breakpoints for the signal stack */
 	thread_change_pc(tsk, tsk->thread.regs);
 
+	rseq_signal_deliver(tsk->thread.regs);
+
 	if (is32) {
         	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
 			ret = handle_rt_signal32(&ksig, oldset, tsk);
@@ -164,6 +166,7 @@  void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		rseq_handle_notify_resume(regs);
 	}
 
 	user_enter();