Message ID | 1400619378-7262-5-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On 20 May 2014 21:56, Richard Henderson <rth@twiddle.net> wrote: > From: Richard Henderson <rth@redhat.com> > > Use a constant frame size, rather than pushing/popping for every saved > register. Use stp, ldp, cbz. Share code with the _nocancel path. > > * sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h (PSEUDO): > Use ENTRY for _nocancel entry point. Reuse pieces of _nocancel > entry point for implementing the cancel path. Simplify cancel > path frame setup. Use cbz instead of cmp+bne for singlethread path. > (DOCARGS_2, UNDOCARGS_2): Use stp/ldp. > (DOCARGS_4, UNDOCARGS_4, DOCARGS_6, UNDOCARGS_6): Likewise. > (SINGLE_THREAD_P) [ASM]: Take a register number in which to > return the result. > --- > .../unix/sysv/linux/aarch64/nptl/sysdep-cancel.h | 186 +++++++-------------- > 1 file changed, 64 insertions(+), 122 deletions(-) > > diff --git a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h > index e3b4b56..5cf3fd5 100644 > --- a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h > +++ b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h > @@ -26,121 +26,66 @@ > > # undef PSEUDO > # define PSEUDO(name, syscall_name, args) \ > - .section ".text"; \ > - .type __##syscall_name##_nocancel,%function; \ > - .globl __##syscall_name##_nocancel; \ > - __##syscall_name##_nocancel: \ > - cfi_startproc; \ > - DO_CALL (syscall_name, args); \ > - cmn x0, 4095; \ > - b.cs .Lsyscall_error; \ > - PSEUDO_RET; \ > - cfi_endproc; \ > - .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \ > - ENTRY (name); \ > - SINGLE_THREAD_P; \ > - bne .Lpseudo_cancel; \ > - DO_CALL (syscall_name, 0); \ > - cmn x0, 4095; \ > - b.cs .Lsyscall_error; \ > - PSEUDO_RET; \ > - .Lpseudo_cancel: \ > - DOCARGS_##args; /* save syscall args etc. around CENABLE. */ \ > - CENABLE; \ > - mov x16, x0; /* put mask in safe place. */ \ > - UNDOCARGS_##args; /* restore syscall args. */ \ > - mov x8, SYS_ify (syscall_name); /* do the call. */ \ > - svc 0; \ > - str x0, [sp, -16]!; /* save syscall return value. */ \ > - cfi_adjust_cfa_offset (16); \ > - mov x0, x16; /* get mask back. */ \ > - CDISABLE; \ > - ldr x0, [sp], 16; \ > - cfi_adjust_cfa_offset (-16); \ > - ldr x30, [sp], 16; \ > - cfi_adjust_cfa_offset (-16); \ > - cfi_restore (x30); \ > - UNDOARGS_##args; \ > - cmn x0, 4095; \ > - b.cs .Lsyscall_error; > - > -# define DOCARGS_0 \ > - str x30, [sp, -16]!; \ > - cfi_adjust_cfa_offset (16); \ > - cfi_rel_offset (x30, 0) > - > + .section ".text"; \ > +ENTRY (__##syscall_name##_nocancel); \ > +.Lpseudo_nocancel: \ > + DO_CALL (syscall_name, args); \ > +.Lpseudo_ret: \ > + cmn x0, 4095; \ > + b.cs .Lsyscall_error; \ > + .subsection 2; \ > + .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \ > +ENTRY (name); \ > + SINGLE_THREAD_P(16); \ > + cbz w16, .Lpseudo_nocancel; \ > + /* Setup common stack frame no matter the number of args. */ \ > + stp x19, x30, [sp, -64]!; \ > + cfi_adjust_cfa_offset (64); \ > + cfi_rel_offset (x19, 0); \ > + cfi_rel_offset (x30, 8); \ > + DOCARGS_##args; /* save syscall args around CENABLE. */ \ > + CENABLE; \ > + mov x19, x0; /* save mask around syscall. */ \ > + UNDOCARGS_##args; /* restore syscall args. */ \ > + DO_CALL (syscall_name, args); \ > + str x0, [sp, 16]; /* save syscall return value. */ \ > + mov x0, x19; /* pass mask to CDISABLE. */ \ > + CDISABLE; \ > + ldr x0, [sp, 16]; \ > + ldp x19, x30, [sp], 64; \ > + cfi_adjust_cfa_offset (-64); \ > + cfi_restore (x19); \ > + cfi_restore (x30); \ > + b .Lpseudo_ret; \ > + cfi_endproc; \ > + .size name, .-name; \ > + .previous > + > +# undef PSEUDO_END > +# define PSEUDO_END(name) \ > + SYSCALL_ERROR_HANDLER; \ > + cfi_endproc > + > +# define DOCARGS_0 > # define UNDOCARGS_0 > > -# define DOCARGS_1 \ > - DOCARGS_0; \ > - str x0, [sp, -16]!; \ > - cfi_adjust_cfa_offset (16); \ > - cfi_rel_offset (x0, 0) > - > -# define UNDOCARGS_1 \ > - ldr x0, [sp], 16; \ > - cfi_restore (x0); \ > - cfi_adjust_cfa_offset (-16); \ > - > -# define DOCARGS_2 \ > - DOCARGS_1; \ > - str x1, [sp, -16]!; \ > - cfi_adjust_cfa_offset (16); \ > - cfi_rel_offset (x1, 0) > - > -# define UNDOCARGS_2 \ > - ldr x1, [sp], 16; \ > - cfi_restore (x1); \ > - cfi_adjust_cfa_offset (-16); \ > - UNDOCARGS_1 > - > -# define DOCARGS_3 \ > - DOCARGS_2; \ > - str x2, [sp, -16]!; \ > - cfi_adjust_cfa_offset (16); \ > - cfi_rel_offset (x2, 0) > - > -# define UNDOCARGS_3 \ > - ldr x2, [sp], 16; \ > - cfi_restore (x2); \ > - cfi_adjust_cfa_offset (-16); \ > - UNDOCARGS_2 > - > -# define DOCARGS_4 \ > - DOCARGS_3; \ > - str x3, [sp, -16]!; \ > - cfi_adjust_cfa_offset (16); \ > - cfi_rel_offset (x3, 0) > - > -# define UNDOCARGS_4 \ > - ldr x3, [sp], 16; \ > - cfi_restore (x3); \ > - cfi_adjust_cfa_offset (-16); \ > - UNDOCARGS_3 > - > -# define DOCARGS_5 \ > - DOCARGS_4; \ > - str x4, [sp, -16]!; \ > - cfi_adjust_cfa_offset (16); \ > - cfi_rel_offset (x4, 0) > - > -# define UNDOCARGS_5 \ > - ldr x4, [sp], 16; \ > - cfi_restore (x4); \ > - cfi_adjust_cfa_offset (-16); \ > - UNDOCARGS_4 > - > -# define DOCARGS_6 \ > - DOCARGS_5; \ > - str x5, [sp, -16]!; \ > - cfi_adjust_cfa_offset (16); \ > - cfi_rel_offset (x5, 0) > - > -# define UNDOCARGS_6 \ > - ldr x5, [sp], 16; \ > - cfi_restore (x5); \ > - cfi_adjust_cfa_offset (-16); \ > - UNDOCARGS_5 > +# define DOCARGS_1 str x0, [sp, 16] > +# define UNDOCARGS_1 ldr x0, [sp, 16] > + > +# define DOCARGS_2 stp x0, x1, [sp, 16] > +# define UNDOCARGS_2 ldp x0, x1, [sp, 16] > + > +# define DOCARGS_3 DOCARGS_2; str x2, [sp, 32] > +# define UNDOCARGS_3 UNDOCARGS_2; ldr x2, [sp, 32] > + > +# define DOCARGS_4 DOCARGS_2; stp x2, x3, [sp, 32] > +# define UNDOCARGS_4 UNDOCARGS_2; ldp x2, x3, [sp, 32] > + > +# define DOCARGS_5 DOCARGS_4; str x4, [sp, 48] > +# define UNDOCARGS_5 UNDOCARGS_4; ldr x4, [sp, 48] > + > +# define DOCARGS_6 DOCARGS_4; stp x4, x5, [sp, 48] > +# define UNDOCARGS_6 UNDOCARGS_4; ldp x4, x5, [sp, 48] > > # ifdef IS_IN_libpthread > # define CENABLE bl __pthread_enable_asynccancel > @@ -162,10 +107,9 @@ > extern int __local_multiple_threads attribute_hidden; > # define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1) > # else > -# define SINGLE_THREAD_P \ > - adrp x16, __local_multiple_threads; \ > - ldr w16, [x16, #:lo12:__local_multiple_threads]; \ > - cmp w16, 0; > +# define SINGLE_THREAD_P(R) \ > + adrp x##R, __local_multiple_threads; \ > + ldr w##R, [x##R, #:lo12:__local_multiple_threads] > # endif > # else > /* There is no __local_multiple_threads for librt, so use the TCB. */ > @@ -174,20 +118,18 @@ extern int __local_multiple_threads attribute_hidden; > __builtin_expect (THREAD_GETMEM (THREAD_SELF, \ > header.multiple_threads) == 0, 1) > # else > -# define SINGLE_THREAD_P \ > +# define SINGLE_THREAD_P(R) \ > stp x0, x30, [sp, -16]!; \ > cfi_adjust_cfa_offset (16); \ > cfi_rel_offset (x0, 0); \ > cfi_rel_offset (x30, 8); \ > bl __read_tp; \ > sub x0, x0, PTHREAD_SIZEOF; \ > - ldr w16, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \ > + ldr w##R, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \ > ldp x0, x30, [sp], 16; \ > cfi_restore (x0); \ > cfi_restore (x30); \ > - cfi_adjust_cfa_offset (-16); \ > - cmp w16, 0 > -# define SINGLE_THREAD_P_PIC(x) SINGLE_THREAD_P This macro gets removed which I think probably deserves a mention in the ChangeLog. It looks like it can be removed from the ARM port too...
On 05/20/2014 01:56 PM, Richard Henderson wrote: > - mov x16, x0; /* put mask in safe place. */ \ > - UNDOCARGS_##args; /* restore syscall args. */ \ > - mov x8, SYS_ify (syscall_name); /* do the call. */ \ > - svc 0; \ > - str x0, [sp, -16]!; /* save syscall return value. */ \ > - cfi_adjust_cfa_offset (16); \ > - mov x0, x16; /* get mask back. */ \ Oh, I should have mentioned, that there's either a bug or inconsistency here. We're saving a value in x16 around the syscall. Except that on the C side, for the inline syscalls, we mark x16 as clobbered. My patch assumes that we want to assume "normal-ish" calling conventions for the kernel and puts the value in the normal call saved register x19. If there are in fact a set of calling-convention call-clobbered registers that are not clobbered by syscalls, then we should consider adjusting the inline syscalls to match. r~
On 21 May 2014 16:10, Richard Henderson <rth@twiddle.net> wrote: > On 05/20/2014 01:56 PM, Richard Henderson wrote: >> - mov x16, x0; /* put mask in safe place. */ \ >> - UNDOCARGS_##args; /* restore syscall args. */ \ >> - mov x8, SYS_ify (syscall_name); /* do the call. */ \ >> - svc 0; \ >> - str x0, [sp, -16]!; /* save syscall return value. */ \ >> - cfi_adjust_cfa_offset (16); \ >> - mov x0, x16; /* get mask back. */ \ > > Oh, I should have mentioned, that there's either a bug or inconsistency here. > > We're saving a value in x16 around the syscall. Except that on the C side, for > the inline syscalls, we mark x16 as clobbered. > > My patch assumes that we want to assume "normal-ish" calling conventions for > the kernel and puts the value in the normal call saved register x19. > > If there are in fact a set of calling-convention call-clobbered registers that > are not clobbered by syscalls, then we should consider adjusting the inline > syscalls to match. When this was discussed previously it turns out that everything apart from the return value is preserved: https://sourceware.org/ml/libc-alpha/2014-03/msg00552.html I guess it would be better to avoid clobbering so many registers in the inline case.
diff --git a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h index e3b4b56..5cf3fd5 100644 --- a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h +++ b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h @@ -26,121 +26,66 @@ # undef PSEUDO # define PSEUDO(name, syscall_name, args) \ - .section ".text"; \ - .type __##syscall_name##_nocancel,%function; \ - .globl __##syscall_name##_nocancel; \ - __##syscall_name##_nocancel: \ - cfi_startproc; \ - DO_CALL (syscall_name, args); \ - cmn x0, 4095; \ - b.cs .Lsyscall_error; \ - PSEUDO_RET; \ - cfi_endproc; \ - .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \ - ENTRY (name); \ - SINGLE_THREAD_P; \ - bne .Lpseudo_cancel; \ - DO_CALL (syscall_name, 0); \ - cmn x0, 4095; \ - b.cs .Lsyscall_error; \ - PSEUDO_RET; \ - .Lpseudo_cancel: \ - DOCARGS_##args; /* save syscall args etc. around CENABLE. */ \ - CENABLE; \ - mov x16, x0; /* put mask in safe place. */ \ - UNDOCARGS_##args; /* restore syscall args. */ \ - mov x8, SYS_ify (syscall_name); /* do the call. */ \ - svc 0; \ - str x0, [sp, -16]!; /* save syscall return value. */ \ - cfi_adjust_cfa_offset (16); \ - mov x0, x16; /* get mask back. */ \ - CDISABLE; \ - ldr x0, [sp], 16; \ - cfi_adjust_cfa_offset (-16); \ - ldr x30, [sp], 16; \ - cfi_adjust_cfa_offset (-16); \ - cfi_restore (x30); \ - UNDOARGS_##args; \ - cmn x0, 4095; \ - b.cs .Lsyscall_error; - -# define DOCARGS_0 \ - str x30, [sp, -16]!; \ - cfi_adjust_cfa_offset (16); \ - cfi_rel_offset (x30, 0) - + .section ".text"; \ +ENTRY (__##syscall_name##_nocancel); \ +.Lpseudo_nocancel: \ + DO_CALL (syscall_name, args); \ +.Lpseudo_ret: \ + cmn x0, 4095; \ + b.cs .Lsyscall_error; \ + .subsection 2; \ + .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \ +ENTRY (name); \ + SINGLE_THREAD_P(16); \ + cbz w16, .Lpseudo_nocancel; \ + /* Setup common stack frame no matter the number of args. */ \ + stp x19, x30, [sp, -64]!; \ + cfi_adjust_cfa_offset (64); \ + cfi_rel_offset (x19, 0); \ + cfi_rel_offset (x30, 8); \ + DOCARGS_##args; /* save syscall args around CENABLE. */ \ + CENABLE; \ + mov x19, x0; /* save mask around syscall. */ \ + UNDOCARGS_##args; /* restore syscall args. */ \ + DO_CALL (syscall_name, args); \ + str x0, [sp, 16]; /* save syscall return value. */ \ + mov x0, x19; /* pass mask to CDISABLE. */ \ + CDISABLE; \ + ldr x0, [sp, 16]; \ + ldp x19, x30, [sp], 64; \ + cfi_adjust_cfa_offset (-64); \ + cfi_restore (x19); \ + cfi_restore (x30); \ + b .Lpseudo_ret; \ + cfi_endproc; \ + .size name, .-name; \ + .previous + +# undef PSEUDO_END +# define PSEUDO_END(name) \ + SYSCALL_ERROR_HANDLER; \ + cfi_endproc + +# define DOCARGS_0 # define UNDOCARGS_0 -# define DOCARGS_1 \ - DOCARGS_0; \ - str x0, [sp, -16]!; \ - cfi_adjust_cfa_offset (16); \ - cfi_rel_offset (x0, 0) - -# define UNDOCARGS_1 \ - ldr x0, [sp], 16; \ - cfi_restore (x0); \ - cfi_adjust_cfa_offset (-16); \ - -# define DOCARGS_2 \ - DOCARGS_1; \ - str x1, [sp, -16]!; \ - cfi_adjust_cfa_offset (16); \ - cfi_rel_offset (x1, 0) - -# define UNDOCARGS_2 \ - ldr x1, [sp], 16; \ - cfi_restore (x1); \ - cfi_adjust_cfa_offset (-16); \ - UNDOCARGS_1 - -# define DOCARGS_3 \ - DOCARGS_2; \ - str x2, [sp, -16]!; \ - cfi_adjust_cfa_offset (16); \ - cfi_rel_offset (x2, 0) - -# define UNDOCARGS_3 \ - ldr x2, [sp], 16; \ - cfi_restore (x2); \ - cfi_adjust_cfa_offset (-16); \ - UNDOCARGS_2 - -# define DOCARGS_4 \ - DOCARGS_3; \ - str x3, [sp, -16]!; \ - cfi_adjust_cfa_offset (16); \ - cfi_rel_offset (x3, 0) - -# define UNDOCARGS_4 \ - ldr x3, [sp], 16; \ - cfi_restore (x3); \ - cfi_adjust_cfa_offset (-16); \ - UNDOCARGS_3 - -# define DOCARGS_5 \ - DOCARGS_4; \ - str x4, [sp, -16]!; \ - cfi_adjust_cfa_offset (16); \ - cfi_rel_offset (x4, 0) - -# define UNDOCARGS_5 \ - ldr x4, [sp], 16; \ - cfi_restore (x4); \ - cfi_adjust_cfa_offset (-16); \ - UNDOCARGS_4 - -# define DOCARGS_6 \ - DOCARGS_5; \ - str x5, [sp, -16]!; \ - cfi_adjust_cfa_offset (16); \ - cfi_rel_offset (x5, 0) - -# define UNDOCARGS_6 \ - ldr x5, [sp], 16; \ - cfi_restore (x5); \ - cfi_adjust_cfa_offset (-16); \ - UNDOCARGS_5 +# define DOCARGS_1 str x0, [sp, 16] +# define UNDOCARGS_1 ldr x0, [sp, 16] + +# define DOCARGS_2 stp x0, x1, [sp, 16] +# define UNDOCARGS_2 ldp x0, x1, [sp, 16] + +# define DOCARGS_3 DOCARGS_2; str x2, [sp, 32] +# define UNDOCARGS_3 UNDOCARGS_2; ldr x2, [sp, 32] + +# define DOCARGS_4 DOCARGS_2; stp x2, x3, [sp, 32] +# define UNDOCARGS_4 UNDOCARGS_2; ldp x2, x3, [sp, 32] + +# define DOCARGS_5 DOCARGS_4; str x4, [sp, 48] +# define UNDOCARGS_5 UNDOCARGS_4; ldr x4, [sp, 48] + +# define DOCARGS_6 DOCARGS_4; stp x4, x5, [sp, 48] +# define UNDOCARGS_6 UNDOCARGS_4; ldp x4, x5, [sp, 48] # ifdef IS_IN_libpthread # define CENABLE bl __pthread_enable_asynccancel @@ -162,10 +107,9 @@ extern int __local_multiple_threads attribute_hidden; # define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1) # else -# define SINGLE_THREAD_P \ - adrp x16, __local_multiple_threads; \ - ldr w16, [x16, #:lo12:__local_multiple_threads]; \ - cmp w16, 0; +# define SINGLE_THREAD_P(R) \ + adrp x##R, __local_multiple_threads; \ + ldr w##R, [x##R, #:lo12:__local_multiple_threads] # endif # else /* There is no __local_multiple_threads for librt, so use the TCB. */ @@ -174,20 +118,18 @@ extern int __local_multiple_threads attribute_hidden; __builtin_expect (THREAD_GETMEM (THREAD_SELF, \ header.multiple_threads) == 0, 1) # else -# define SINGLE_THREAD_P \ +# define SINGLE_THREAD_P(R) \ stp x0, x30, [sp, -16]!; \ cfi_adjust_cfa_offset (16); \ cfi_rel_offset (x0, 0); \ cfi_rel_offset (x30, 8); \ bl __read_tp; \ sub x0, x0, PTHREAD_SIZEOF; \ - ldr w16, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \ + ldr w##R, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \ ldp x0, x30, [sp], 16; \ cfi_restore (x0); \ cfi_restore (x30); \ - cfi_adjust_cfa_offset (-16); \ - cmp w16, 0 -# define SINGLE_THREAD_P_PIC(x) SINGLE_THREAD_P + cfi_adjust_cfa_offset (-16) # endif # endif
From: Richard Henderson <rth@redhat.com> Use a constant frame size, rather than pushing/popping for every saved register. Use stp, ldp, cbz. Share code with the _nocancel path. * sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h (PSEUDO): Use ENTRY for _nocancel entry point. Reuse pieces of _nocancel entry point for implementing the cancel path. Simplify cancel path frame setup. Use cbz instead of cmp+bne for singlethread path. (DOCARGS_2, UNDOCARGS_2): Use stp/ldp. (DOCARGS_4, UNDOCARGS_4, DOCARGS_6, UNDOCARGS_6): Likewise. (SINGLE_THREAD_P) [ASM]: Take a register number in which to return the result. --- .../unix/sysv/linux/aarch64/nptl/sysdep-cancel.h | 186 +++++++-------------- 1 file changed, 64 insertions(+), 122 deletions(-)