From patchwork Fri May 20 14:05:13 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Josh Poimboeuf X-Patchwork-Id: 624498 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3rB8qb4GpBz9sds for ; Sat, 21 May 2016 00:06:19 +1000 (AEST) Received: from ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) by lists.ozlabs.org (Postfix) with ESMTP id 3rB8qb3SRjzDqT2 for ; Sat, 21 May 2016 00:06:19 +1000 (AEST) X-Original-To: linuxppc-dev@lists.ozlabs.org Delivered-To: linuxppc-dev@lists.ozlabs.org Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 3rB8pR1VmjzDqCg for ; Sat, 21 May 2016 00:05:19 +1000 (AEST) Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 25F976436D; Fri, 20 May 2016 14:05:17 +0000 (UTC) Received: from treble (ovpn-116-148.phx2.redhat.com [10.3.116.148]) by int-mx10.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with SMTP id u4KE5D99021803; Fri, 20 May 2016 10:05:14 -0400 Date: Fri, 20 May 2016 09:05:13 -0500 From: Josh Poimboeuf To: Andy Lutomirski Subject: Re: [RFC PATCH v2 05/18] sched: add task flag for preempt IRQ tracking Message-ID: <20160520140513.dsgfb5sei52qge3w@treble> References: <20160429202701.yijrohqdsurdxv2a@treble> <20160429212546.t26mvthtvh7543ff@treble> <20160429224112.kl3jlk7ccvfceg2r@treble> <20160502135243.jkbnonaesv7zfios@treble> <20160519231546.yvtqz5wacxvykvn2@treble> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: User-Agent: Mutt/1.6.0.1 (2016-04-01) X-Scanned-By: MIMEDefang 2.68 on 10.5.11.23 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 20 May 2016 14:05:17 +0000 (UTC) X-BeenThere: linuxppc-dev@lists.ozlabs.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: "linux-s390@vger.kernel.org" , Jiri Kosina , Jessica Yu , Vojtech Pavlik , Petr Mladek , Peter Zijlstra , X86 ML , Heiko Carstens , "linux-kernel@vger.kernel.org" , Ingo Molnar , live-patching@vger.kernel.org, Jiri Slaby , Miroslav Benes , linuxppc-dev@lists.ozlabs.org, Chris J Arges Errors-To: linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org Sender: "Linuxppc-dev" On Thu, May 19, 2016 at 04:39:51PM -0700, Andy Lutomirski wrote: > On Thu, May 19, 2016 at 4:15 PM, Josh Poimboeuf wrote: > > Note this example is with today's unwinder. It could be made smarter to > > get the RIP from the pt_regs so the '?' could be removed from > > copy_page_to_iter(). > > > > Thoughts? > > I think we should do that. The silly sample patch I sent you (or at > least that I think I sent you) did that, and it worked nicely. Yeah, we can certainly do something similar to make the unwinder smarter. It should be very simple with this approach: if it finds the pt_regs() function on the stack, the (struct pt_regs *) pointer will be right after it. > > diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h > > index 9a9e588..f54886a 100644 > > --- a/arch/x86/entry/calling.h > > +++ b/arch/x86/entry/calling.h > > @@ -201,6 +201,32 @@ For 32-bit we have the following conventions - kernel is built with > > .byte 0xf1 > > .endm > > > > + /* > > + * Create a stack frame for the saved pt_regs. This allows frame > > + * pointer based unwinders to find pt_regs on the stack. > > + */ > > + .macro CREATE_PT_REGS_FRAME regs=%rsp > > +#ifdef CONFIG_FRAME_POINTER > > + pushq \regs > > + pushq $pt_regs+1 > > + pushq %rbp > > + movq %rsp, %rbp > > +#endif > > + .endm > > I don't love this part. It's going to hurt performance, and, given > that we need to change the unwinder anyway to make it useful, let's > just emit a table somewhere in .rodata and use it directly. I'm not sure about the idea of a table. I get the feeling it would add more complexity to both the entry code and the unwinder. How would you specify the pt_regs location when it's on a different stack? (See the interrupt macro: non-nested interrupts will place pt_regs on the task stack before switching to the irq stack.) If you're worried about performance, I can remove the syscall annotations. They're optional anyway, since the pt_regs is always at the same place on the stack for syscalls. I think three extra pushes wouldn't be a performance issue for interrupts/exceptions. And they'll go away when we finally bury CONFIG_FRAME_POINTER. Here's the same patch without syscall annotations: diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 9a9e588..f54886a 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -201,6 +201,32 @@ For 32-bit we have the following conventions - kernel is built with .byte 0xf1 .endm + /* + * Create a stack frame for the saved pt_regs. This allows frame + * pointer based unwinders to find pt_regs on the stack. + */ + .macro CREATE_PT_REGS_FRAME regs=%rsp +#ifdef CONFIG_FRAME_POINTER + pushq \regs + pushq $pt_regs+1 + pushq %rbp + movq %rsp, %rbp +#endif + .endm + + .macro REMOVE_PT_REGS_FRAME +#ifdef CONFIG_FRAME_POINTER + popq %rbp + addq $0x10, %rsp +#endif + .endm + + .macro CALL_HANDLER handler regs=%rsp + CREATE_PT_REGS_FRAME \regs + call \handler + REMOVE_PT_REGS_FRAME + .endm + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9ee0da1..93bc8f0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -468,7 +468,7 @@ END(irq_entries_start) /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF - call \func /* rdi points to pt_regs */ + CALL_HANDLER \func regs=%rdi .endm /* @@ -495,7 +495,7 @@ ret_from_intr: /* Interrupt came from user space */ GLOBAL(retint_user) mov %rsp,%rdi - call prepare_exit_to_usermode + CALL_HANDLER prepare_exit_to_usermode TRACE_IRQS_IRETQ SWAPGS jmp restore_regs_and_iret @@ -509,7 +509,7 @@ retint_kernel: jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) jnz 1f - call preempt_schedule_irq + CALL_HANDLER preempt_schedule_irq jmp 0b 1: #endif @@ -688,8 +688,6 @@ ENTRY(\sym) .endif .endif - movq %rsp, %rdi /* pt_regs pointer */ - .if \has_error_code movq ORIG_RAX(%rsp), %rsi /* get error code */ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ @@ -701,7 +699,8 @@ ENTRY(\sym) subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) .endif - call \do_sym + movq %rsp, %rdi /* pt_regs pointer */ + CALL_HANDLER \do_sym .if \shift_ist != -1 addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) @@ -728,8 +727,6 @@ ENTRY(\sym) call sync_regs movq %rax, %rsp /* switch stack */ - movq %rsp, %rdi /* pt_regs pointer */ - .if \has_error_code movq ORIG_RAX(%rsp), %rsi /* get error code */ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ @@ -737,7 +734,8 @@ ENTRY(\sym) xorl %esi, %esi /* no error code */ .endif - call \do_sym + movq %rsp, %rdi /* pt_regs pointer */ + CALL_HANDLER \do_sym jmp error_exit /* %ebx: no swapgs flag */ .endif @@ -1174,7 +1172,7 @@ ENTRY(nmi) movq %rsp, %rdi movq $-1, %rsi - call do_nmi + CALL_HANDLER do_nmi /* * Return back to user mode. We must *not* do the normal exit @@ -1387,7 +1385,7 @@ end_repeat_nmi: /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp, %rdi movq $-1, %rsi - call do_nmi + CALL_HANDLER do_nmi testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore @@ -1423,3 +1421,11 @@ ENTRY(ignore_sysret) mov $-ENOSYS, %eax sysret END(ignore_sysret) + +/* fake function which allows stack unwinders to detect pt_regs frames */ +#ifdef CONFIG_FRAME_POINTER +ENTRY(pt_regs) + nop + nop +ENDPROC(pt_regs) +#endif /* CONFIG_FRAME_POINTER */