Message ID | 57850A82.3080000@linaro.org |
---|---|
State | New |
Headers | show |
On Tue, Jul 12, 2016 at 8:19 AM, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > > On 12/07/2016 16:03, H.J. Lu wrote: >> On Tue, Jul 12, 2016 at 7:04 AM, Adhemerval Zanella >> <adhemerval.zanella@linaro.org> wrote: >>> >>> >>> On 12/07/2016 14:26, H.J. Lu wrote: >>>> 64-bit off_t in pread64, preadv, pwrite64 and pwritev syscalls is pased >>>> in one 64-bit register for both x32 and x86-64. Since the inline >>>> asm statement only passes long, which is 32-bit for x32, in registers, >>>> 64-bit off_t is truncated to 32-bit on x32. Since __ASSUME_PREADV and >>>> __ASSUME_PWRITEV are defined unconditionally, these syscalls can be >>>> implemented in syscalls.list to pass 64-bit off_t in one 64-bit register. >>>> >>>> Tested on x86-64 and x32 with off_t > 4GB on pread64/pwrite64 and >>>> preadv64/pwritev64. >>>> >>>> OK for master? >>>> >>>> H.J. >>>> --- >>>> [BZ #20348] >>>> * sysdeps/unix/sysv/linux/x86_64/syscalls.list: Add pread64, >>>> preadv64, pwrite64 and pwritev64. >>>> --- >>>> sysdeps/unix/sysv/linux/x86_64/syscalls.list | 4 ++++ >>>> 1 file changed, 4 insertions(+) >>>> >>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/syscalls.list b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>> index d09d101..bcf6370 100644 >>>> --- a/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>> +++ b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>> @@ -6,6 +6,10 @@ msgctl - msgctl i:iip __msgctl msgctl >>>> msgget - msgget i:ii __msgget msgget >>>> msgrcv - msgrcv Ci:ibnii __msgrcv msgrcv >>>> msgsnd - msgsnd Ci:ibni __msgsnd msgsnd >>>> +pread64 - pread64 Ci:ipii __libc_pread __libc_pread64 __pread64 pread64 __pread pread >>>> +preadv64 - preadv Ci:ipii preadv64 preadv >>>> +pwrite64 - pwrite64 Ci:ipii __libc_pwrite __pwrite64 pwrite64 __pwrite pwrite >>>> +pwritev64 - pwritev Ci:ipii pwritev64 pwritev >>>> shmat - shmat i:ipi __shmat shmat >>>> shmctl - shmctl i:iip __shmctl shmctl >>>> shmdt - shmdt i:s __shmdt shmdt >>>> >>> >>> This is pretty much what I suggested [1] with the difference that my >>> idea is to re-add the auto-generated wrappers just for x32. I would >>> prefer to keep x86_64 continue to use default implementation and >>> work on fix {INLINE,INTERNAL}_SYSCALL to work with 64-bit arguments >>> in x32. >> >> syscalls.list is the preferred way to implement a system call, not >> {INLINE,INTERNAL}_SYSCALL. There is no reason only to do it >> for x32. As for {INLINE,INTERNAL}_SYSCALL with 64-bit argument, >> they are only used in p{read,write}[v]64 and it is incorrect to pass long >> as 64-bit integer to x32 syscall if the argument is long or pointer. > > The idea I am trying to push with all these consolidation are twofold: > > 1. Remove the complexity implementation files and way to call syscalls > inside GLIBC and make easier to implement new ports That is fine. > 2. Remove the redundant sysdep-cancel.h requirement for each port > which basically implementations pic/nopic function calls in assembly. > This is also remove implementation complexity and make easier for > new port implementation. > > Also, for 2. it also helps the long standing pthread cancellation > (bz#12683) by focusing all cancellation calls in only one implementation. > > I do get the idea the auto-generation call is currently preferred way > to implementation syscalls, but I think for *cancellable* way we should > push to implement using SYSCALL_CANCEL (which is in turn based on > INTERNAL_SYSCALL). That is fine also. But on x86-64, we should use syscalls.list if possible, especially for cancellation calls. With {INLINE,INTERNAL}_SYSCALL: 0000000000000000 <__libc_pread>: 0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <__libc_pread+0x6> 6: 49 89 ca mov %rcx,%r10 9: 85 c0 test %eax,%eax b: 75 2b jne 38 <__libc_pread+0x38> d: 48 63 ff movslq %edi,%rdi 10: b8 11 00 00 00 mov $0x11,%eax 15: 0f 05 syscall 17: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 1d: 77 01 ja 20 <__libc_pread+0x20> 1f: c3 retq 20: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 27 <__libc_pread+0x27> 27: f7 d8 neg %eax 29: 64 89 02 mov %eax,%fs:(%rdx) 2c: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 33: c3 retq 34: 0f 1f 40 00 nopl 0x0(%rax) 38: 41 55 push %r13 3a: 41 54 push %r12 3c: 49 89 cd mov %rcx,%r13 3f: 55 push %rbp 40: 53 push %rbx 41: 49 89 d4 mov %rdx,%r12 44: 48 89 f5 mov %rsi,%rbp 47: 89 fb mov %edi,%ebx 49: 48 83 ec 18 sub $0x18,%rsp 4d: e8 00 00 00 00 callq 52 <__libc_pread+0x52> 52: 4d 89 ea mov %r13,%r10 55: 41 89 c0 mov %eax,%r8d 58: 4c 89 e2 mov %r12,%rdx 5b: 48 89 ee mov %rbp,%rsi 5e: 48 63 fb movslq %ebx,%rdi 61: b8 11 00 00 00 mov $0x11,%eax 66: 0f 05 syscall 68: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 6e: 77 1d ja 8d <__libc_pread+0x8d> 70: 44 89 c7 mov %r8d,%edi 73: 48 89 44 24 08 mov %rax,0x8(%rsp) 78: e8 00 00 00 00 callq 7d <__libc_pread+0x7d> 7d: 48 8b 44 24 08 mov 0x8(%rsp),%rax 82: 48 83 c4 18 add $0x18,%rsp 86: 5b pop %rbx 87: 5d pop %rbp 88: 41 5c pop %r12 8a: 41 5d pop %r13 8c: c3 retq 8d: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 94 <__libc_pread+0x94> 94: f7 d8 neg %eax 96: 64 89 02 mov %eax,%fs:(%rdx) 99: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax a0: eb ce jmp 70 <__libc_pread+0x70> With syscalls.list: Disassembly of section .text: 0000000000000000 <__libc_pread>: 0: 83 3d 00 00 00 00 00 cmpl $0x0,0x0(%rip) # 7 <__libc_pread+0x7> 7: 75 17 jne 20 <__pread64_nocancel+0x17> 0000000000000009 <__pread64_nocancel>: 9: 49 89 ca mov %rcx,%r10 c: b8 11 00 00 00 mov $0x11,%eax 11: 0f 05 syscall 13: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax 19: 0f 83 00 00 00 00 jae 1f <__pread64_nocancel+0x16> 1f: c3 retq 20: 48 83 ec 08 sub $0x8,%rsp 24: e8 00 00 00 00 callq 29 <__pread64_nocancel+0x20> 29: 48 89 04 24 mov %rax,(%rsp) 2d: 49 89 ca mov %rcx,%r10 30: b8 11 00 00 00 mov $0x11,%eax 35: 0f 05 syscall 37: 48 8b 3c 24 mov (%rsp),%rdi 3b: 48 89 c2 mov %rax,%rdx 3e: e8 00 00 00 00 callq 43 <__pread64_nocancel+0x3a> 43: 48 89 d0 mov %rdx,%rax 46: 48 83 c4 08 add $0x8,%rsp 4a: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax 50: 0f 83 00 00 00 00 jae 56 <__pread64_nocancel+0x4d> 56: c3 retq This one is much better. >> >>> Also, I think we should remove the first try to fix LO_HI_LONG [2], >>> since 64 bits argument does not work correct in x32 anyway. >> >> I think LO_HI_LONG should be defined only if __WORDSIZE != 64 >> and p{read,write}[v]64 should be added to wordsize-64/syscalls.list. > > Indeed this is something I will get back now that I see x32 fails > with current implementation. I got the wrong idea all ILP32 would > use the compat code (as MIPS64n64). > > About the patch, based on current timeframe I think your solution is > the safest one for x86. > > However I do would like to re-enable it on x86, including x32, when 2.25 > opens. The patch below changes slight on how {INLINE,INTERNAL}_SYSCALL > works on x32: int/long/pointer should be passed as before and uint64_t > arguments should be passed as register all well without casting. > If you have time I would like to check if it would be acceptable for > 2.25. It shows no regression on x32, including the tst-preadwrite{64} > testcase you sent earlier: I will give it a try.
On Tue, Jul 12, 2016 at 8:49 AM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Tue, Jul 12, 2016 at 8:19 AM, Adhemerval Zanella > <adhemerval.zanella@linaro.org> wrote: >> >> >> On 12/07/2016 16:03, H.J. Lu wrote: >>> On Tue, Jul 12, 2016 at 7:04 AM, Adhemerval Zanella >>> <adhemerval.zanella@linaro.org> wrote: >>>> >>>> >>>> On 12/07/2016 14:26, H.J. Lu wrote: >>>>> 64-bit off_t in pread64, preadv, pwrite64 and pwritev syscalls is pased >>>>> in one 64-bit register for both x32 and x86-64. Since the inline >>>>> asm statement only passes long, which is 32-bit for x32, in registers, >>>>> 64-bit off_t is truncated to 32-bit on x32. Since __ASSUME_PREADV and >>>>> __ASSUME_PWRITEV are defined unconditionally, these syscalls can be >>>>> implemented in syscalls.list to pass 64-bit off_t in one 64-bit register. >>>>> >>>>> Tested on x86-64 and x32 with off_t > 4GB on pread64/pwrite64 and >>>>> preadv64/pwritev64. >>>>> >>>>> OK for master? >>>>> >>>>> H.J. >>>>> --- >>>>> [BZ #20348] >>>>> * sysdeps/unix/sysv/linux/x86_64/syscalls.list: Add pread64, >>>>> preadv64, pwrite64 and pwritev64. >>>>> --- >>>>> sysdeps/unix/sysv/linux/x86_64/syscalls.list | 4 ++++ >>>>> 1 file changed, 4 insertions(+) >>>>> >>>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/syscalls.list b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>> index d09d101..bcf6370 100644 >>>>> --- a/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>> @@ -6,6 +6,10 @@ msgctl - msgctl i:iip __msgctl msgctl >>>>> msgget - msgget i:ii __msgget msgget >>>>> msgrcv - msgrcv Ci:ibnii __msgrcv msgrcv >>>>> msgsnd - msgsnd Ci:ibni __msgsnd msgsnd >>>>> +pread64 - pread64 Ci:ipii __libc_pread __libc_pread64 __pread64 pread64 __pread pread >>>>> +preadv64 - preadv Ci:ipii preadv64 preadv >>>>> +pwrite64 - pwrite64 Ci:ipii __libc_pwrite __pwrite64 pwrite64 __pwrite pwrite >>>>> +pwritev64 - pwritev Ci:ipii pwritev64 pwritev >>>>> shmat - shmat i:ipi __shmat shmat >>>>> shmctl - shmctl i:iip __shmctl shmctl >>>>> shmdt - shmdt i:s __shmdt shmdt >>>>> >>>> >>>> This is pretty much what I suggested [1] with the difference that my >>>> idea is to re-add the auto-generated wrappers just for x32. I would >>>> prefer to keep x86_64 continue to use default implementation and >>>> work on fix {INLINE,INTERNAL}_SYSCALL to work with 64-bit arguments >>>> in x32. >>> >>> syscalls.list is the preferred way to implement a system call, not >>> {INLINE,INTERNAL}_SYSCALL. There is no reason only to do it >>> for x32. As for {INLINE,INTERNAL}_SYSCALL with 64-bit argument, >>> they are only used in p{read,write}[v]64 and it is incorrect to pass long >>> as 64-bit integer to x32 syscall if the argument is long or pointer. >> >> The idea I am trying to push with all these consolidation are twofold: >> >> 1. Remove the complexity implementation files and way to call syscalls >> inside GLIBC and make easier to implement new ports > > That is fine. > >> 2. Remove the redundant sysdep-cancel.h requirement for each port >> which basically implementations pic/nopic function calls in assembly. >> This is also remove implementation complexity and make easier for >> new port implementation. >> >> Also, for 2. it also helps the long standing pthread cancellation >> (bz#12683) by focusing all cancellation calls in only one implementation. >> >> I do get the idea the auto-generation call is currently preferred way >> to implementation syscalls, but I think for *cancellable* way we should >> push to implement using SYSCALL_CANCEL (which is in turn based on >> INTERNAL_SYSCALL). > > That is fine also. But on x86-64, we should use syscalls.list if possible, > especially for cancellation calls. > > With {INLINE,INTERNAL}_SYSCALL: > > 0000000000000000 <__libc_pread>: > 0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <__libc_pread+0x6> > 6: 49 89 ca mov %rcx,%r10 > 9: 85 c0 test %eax,%eax > b: 75 2b jne 38 <__libc_pread+0x38> > d: 48 63 ff movslq %edi,%rdi > 10: b8 11 00 00 00 mov $0x11,%eax > 15: 0f 05 syscall > 17: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax > 1d: 77 01 ja 20 <__libc_pread+0x20> > 1f: c3 retq > 20: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 27 <__libc_pread+0x27> > 27: f7 d8 neg %eax > 29: 64 89 02 mov %eax,%fs:(%rdx) > 2c: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax > 33: c3 retq > 34: 0f 1f 40 00 nopl 0x0(%rax) > 38: 41 55 push %r13 > 3a: 41 54 push %r12 > 3c: 49 89 cd mov %rcx,%r13 > 3f: 55 push %rbp > 40: 53 push %rbx > 41: 49 89 d4 mov %rdx,%r12 > 44: 48 89 f5 mov %rsi,%rbp > 47: 89 fb mov %edi,%ebx > 49: 48 83 ec 18 sub $0x18,%rsp > 4d: e8 00 00 00 00 callq 52 <__libc_pread+0x52> > 52: 4d 89 ea mov %r13,%r10 > 55: 41 89 c0 mov %eax,%r8d > 58: 4c 89 e2 mov %r12,%rdx > 5b: 48 89 ee mov %rbp,%rsi > 5e: 48 63 fb movslq %ebx,%rdi > 61: b8 11 00 00 00 mov $0x11,%eax > 66: 0f 05 syscall > 68: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax > 6e: 77 1d ja 8d <__libc_pread+0x8d> > 70: 44 89 c7 mov %r8d,%edi > 73: 48 89 44 24 08 mov %rax,0x8(%rsp) > 78: e8 00 00 00 00 callq 7d <__libc_pread+0x7d> > 7d: 48 8b 44 24 08 mov 0x8(%rsp),%rax > 82: 48 83 c4 18 add $0x18,%rsp > 86: 5b pop %rbx > 87: 5d pop %rbp > 88: 41 5c pop %r12 > 8a: 41 5d pop %r13 > 8c: c3 retq > 8d: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 94 <__libc_pread+0x94> > 94: f7 d8 neg %eax > 96: 64 89 02 mov %eax,%fs:(%rdx) > 99: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax > a0: eb ce jmp 70 <__libc_pread+0x70> > > With syscalls.list: > > Disassembly of section .text: > > 0000000000000000 <__libc_pread>: > 0: 83 3d 00 00 00 00 00 cmpl $0x0,0x0(%rip) # 7 <__libc_pread+0x7> > 7: 75 17 jne 20 <__pread64_nocancel+0x17> > > 0000000000000009 <__pread64_nocancel>: > 9: 49 89 ca mov %rcx,%r10 > c: b8 11 00 00 00 mov $0x11,%eax > 11: 0f 05 syscall > 13: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax > 19: 0f 83 00 00 00 00 jae 1f <__pread64_nocancel+0x16> > 1f: c3 retq > 20: 48 83 ec 08 sub $0x8,%rsp > 24: e8 00 00 00 00 callq 29 <__pread64_nocancel+0x20> > 29: 48 89 04 24 mov %rax,(%rsp) > 2d: 49 89 ca mov %rcx,%r10 > 30: b8 11 00 00 00 mov $0x11,%eax > 35: 0f 05 syscall > 37: 48 8b 3c 24 mov (%rsp),%rdi > 3b: 48 89 c2 mov %rax,%rdx > 3e: e8 00 00 00 00 callq 43 <__pread64_nocancel+0x3a> > 43: 48 89 d0 mov %rdx,%rax > 46: 48 83 c4 08 add $0x8,%rsp > 4a: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax > 50: 0f 83 00 00 00 00 jae 56 <__pread64_nocancel+0x4d> > 56: c3 retq > > This one is much better. > >>> >>>> Also, I think we should remove the first try to fix LO_HI_LONG [2], >>>> since 64 bits argument does not work correct in x32 anyway. >>> >>> I think LO_HI_LONG should be defined only if __WORDSIZE != 64 >>> and p{read,write}[v]64 should be added to wordsize-64/syscalls.list. >> >> Indeed this is something I will get back now that I see x32 fails >> with current implementation. I got the wrong idea all ILP32 would >> use the compat code (as MIPS64n64). x86/entry/syscalls/syscall_64.tbl has 17 common pread64 sys_pread64 295 64 preadv sys_preadv 534 x32 preadv compat_sys_preadv64 compat_sys_preadv64 takes 64-bit offset in one piece. >> About the patch, based on current timeframe I think your solution is >> the safest one for x86. I will check it in. >> However I do would like to re-enable it on x86, including x32, when 2.25 >> opens. The patch below changes slight on how {INLINE,INTERNAL}_SYSCALL >> works on x32: int/long/pointer should be passed as before and uint64_t >> arguments should be passed as register all well without casting. >> If you have time I would like to check if it would be acceptable for >> 2.25. It shows no regression on x32, including the tst-preadwrite{64} >> testcase you sent earlier: > > I will give it a try. I got no regressions on x32. But on x86-64, I got Inconsistency detected by ld.so: dl-tls.c: 493: _dl_allocate_tls_init: Assertion `listp->slotinfo[cnt].gen <= GL(dl_tls_generation)' failed! FAIL: nptl/tst-stack4 It doesn't fail every time. I am wondering if typeof returns the correct type on signed/unsigned integer constants.
On 12/07/2016 17:19, H.J. Lu wrote: > On Tue, Jul 12, 2016 at 8:49 AM, H.J. Lu <hjl.tools@gmail.com> wrote: >> On Tue, Jul 12, 2016 at 8:19 AM, Adhemerval Zanella >> <adhemerval.zanella@linaro.org> wrote: >>> >>> >>> On 12/07/2016 16:03, H.J. Lu wrote: >>>> On Tue, Jul 12, 2016 at 7:04 AM, Adhemerval Zanella >>>> <adhemerval.zanella@linaro.org> wrote: >>>>> >>>>> >>>>> On 12/07/2016 14:26, H.J. Lu wrote: >>>>>> 64-bit off_t in pread64, preadv, pwrite64 and pwritev syscalls is pased >>>>>> in one 64-bit register for both x32 and x86-64. Since the inline >>>>>> asm statement only passes long, which is 32-bit for x32, in registers, >>>>>> 64-bit off_t is truncated to 32-bit on x32. Since __ASSUME_PREADV and >>>>>> __ASSUME_PWRITEV are defined unconditionally, these syscalls can be >>>>>> implemented in syscalls.list to pass 64-bit off_t in one 64-bit register. >>>>>> >>>>>> Tested on x86-64 and x32 with off_t > 4GB on pread64/pwrite64 and >>>>>> preadv64/pwritev64. >>>>>> >>>>>> OK for master? >>>>>> >>>>>> H.J. >>>>>> --- >>>>>> [BZ #20348] >>>>>> * sysdeps/unix/sysv/linux/x86_64/syscalls.list: Add pread64, >>>>>> preadv64, pwrite64 and pwritev64. >>>>>> --- >>>>>> sysdeps/unix/sysv/linux/x86_64/syscalls.list | 4 ++++ >>>>>> 1 file changed, 4 insertions(+) >>>>>> >>>>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/syscalls.list b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>> index d09d101..bcf6370 100644 >>>>>> --- a/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>> @@ -6,6 +6,10 @@ msgctl - msgctl i:iip __msgctl msgctl >>>>>> msgget - msgget i:ii __msgget msgget >>>>>> msgrcv - msgrcv Ci:ibnii __msgrcv msgrcv >>>>>> msgsnd - msgsnd Ci:ibni __msgsnd msgsnd >>>>>> +pread64 - pread64 Ci:ipii __libc_pread __libc_pread64 __pread64 pread64 __pread pread >>>>>> +preadv64 - preadv Ci:ipii preadv64 preadv >>>>>> +pwrite64 - pwrite64 Ci:ipii __libc_pwrite __pwrite64 pwrite64 __pwrite pwrite >>>>>> +pwritev64 - pwritev Ci:ipii pwritev64 pwritev >>>>>> shmat - shmat i:ipi __shmat shmat >>>>>> shmctl - shmctl i:iip __shmctl shmctl >>>>>> shmdt - shmdt i:s __shmdt shmdt >>>>>> >>>>> >>>>> This is pretty much what I suggested [1] with the difference that my >>>>> idea is to re-add the auto-generated wrappers just for x32. I would >>>>> prefer to keep x86_64 continue to use default implementation and >>>>> work on fix {INLINE,INTERNAL}_SYSCALL to work with 64-bit arguments >>>>> in x32. >>>> >>>> syscalls.list is the preferred way to implement a system call, not >>>> {INLINE,INTERNAL}_SYSCALL. There is no reason only to do it >>>> for x32. As for {INLINE,INTERNAL}_SYSCALL with 64-bit argument, >>>> they are only used in p{read,write}[v]64 and it is incorrect to pass long >>>> as 64-bit integer to x32 syscall if the argument is long or pointer. >>> >>> The idea I am trying to push with all these consolidation are twofold: >>> >>> 1. Remove the complexity implementation files and way to call syscalls >>> inside GLIBC and make easier to implement new ports >> >> That is fine. >> >>> 2. Remove the redundant sysdep-cancel.h requirement for each port >>> which basically implementations pic/nopic function calls in assembly. >>> This is also remove implementation complexity and make easier for >>> new port implementation. >>> >>> Also, for 2. it also helps the long standing pthread cancellation >>> (bz#12683) by focusing all cancellation calls in only one implementation. >>> >>> I do get the idea the auto-generation call is currently preferred way >>> to implementation syscalls, but I think for *cancellable* way we should >>> push to implement using SYSCALL_CANCEL (which is in turn based on >>> INTERNAL_SYSCALL). >> >> That is fine also. But on x86-64, we should use syscalls.list if possible, >> especially for cancellation calls. >> >> With {INLINE,INTERNAL}_SYSCALL: >> >> 0000000000000000 <__libc_pread>: >> 0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <__libc_pread+0x6> >> 6: 49 89 ca mov %rcx,%r10 >> 9: 85 c0 test %eax,%eax >> b: 75 2b jne 38 <__libc_pread+0x38> >> d: 48 63 ff movslq %edi,%rdi >> 10: b8 11 00 00 00 mov $0x11,%eax >> 15: 0f 05 syscall >> 17: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >> 1d: 77 01 ja 20 <__libc_pread+0x20> >> 1f: c3 retq >> 20: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 27 <__libc_pread+0x27> >> 27: f7 d8 neg %eax >> 29: 64 89 02 mov %eax,%fs:(%rdx) >> 2c: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >> 33: c3 retq >> 34: 0f 1f 40 00 nopl 0x0(%rax) >> 38: 41 55 push %r13 >> 3a: 41 54 push %r12 >> 3c: 49 89 cd mov %rcx,%r13 >> 3f: 55 push %rbp >> 40: 53 push %rbx >> 41: 49 89 d4 mov %rdx,%r12 >> 44: 48 89 f5 mov %rsi,%rbp >> 47: 89 fb mov %edi,%ebx >> 49: 48 83 ec 18 sub $0x18,%rsp >> 4d: e8 00 00 00 00 callq 52 <__libc_pread+0x52> >> 52: 4d 89 ea mov %r13,%r10 >> 55: 41 89 c0 mov %eax,%r8d >> 58: 4c 89 e2 mov %r12,%rdx >> 5b: 48 89 ee mov %rbp,%rsi >> 5e: 48 63 fb movslq %ebx,%rdi >> 61: b8 11 00 00 00 mov $0x11,%eax >> 66: 0f 05 syscall >> 68: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >> 6e: 77 1d ja 8d <__libc_pread+0x8d> >> 70: 44 89 c7 mov %r8d,%edi >> 73: 48 89 44 24 08 mov %rax,0x8(%rsp) >> 78: e8 00 00 00 00 callq 7d <__libc_pread+0x7d> >> 7d: 48 8b 44 24 08 mov 0x8(%rsp),%rax >> 82: 48 83 c4 18 add $0x18,%rsp >> 86: 5b pop %rbx >> 87: 5d pop %rbp >> 88: 41 5c pop %r12 >> 8a: 41 5d pop %r13 >> 8c: c3 retq >> 8d: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 94 <__libc_pread+0x94> >> 94: f7 d8 neg %eax >> 96: 64 89 02 mov %eax,%fs:(%rdx) >> 99: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >> a0: eb ce jmp 70 <__libc_pread+0x70> >> >> With syscalls.list: >> >> Disassembly of section .text: >> >> 0000000000000000 <__libc_pread>: >> 0: 83 3d 00 00 00 00 00 cmpl $0x0,0x0(%rip) # 7 <__libc_pread+0x7> >> 7: 75 17 jne 20 <__pread64_nocancel+0x17> >> >> 0000000000000009 <__pread64_nocancel>: >> 9: 49 89 ca mov %rcx,%r10 >> c: b8 11 00 00 00 mov $0x11,%eax >> 11: 0f 05 syscall >> 13: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >> 19: 0f 83 00 00 00 00 jae 1f <__pread64_nocancel+0x16> >> 1f: c3 retq >> 20: 48 83 ec 08 sub $0x8,%rsp >> 24: e8 00 00 00 00 callq 29 <__pread64_nocancel+0x20> >> 29: 48 89 04 24 mov %rax,(%rsp) >> 2d: 49 89 ca mov %rcx,%r10 >> 30: b8 11 00 00 00 mov $0x11,%eax >> 35: 0f 05 syscall >> 37: 48 8b 3c 24 mov (%rsp),%rdi >> 3b: 48 89 c2 mov %rax,%rdx >> 3e: e8 00 00 00 00 callq 43 <__pread64_nocancel+0x3a> >> 43: 48 89 d0 mov %rdx,%rax >> 46: 48 83 c4 08 add $0x8,%rsp >> 4a: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >> 50: 0f 83 00 00 00 00 jae 56 <__pread64_nocancel+0x4d> >> 56: c3 retq >> >> This one is much better. I think SYSCALL_CANCEL can be slight improved by using INTERNAL_SYSCALL instead of INLINE_SYSCALL and checking the error outside of cancellation handling. I see a slight improvement, however sysdep-cancel.h is still slight better. However the idea is I am trying to push in the end is to remove the sysdep-cancel.h because the new cancellation scheme will require it to basically do a function call instead of calling the cancellation enable/disable (which will be also removed). >> >>>> >>>>> Also, I think we should remove the first try to fix LO_HI_LONG [2], >>>>> since 64 bits argument does not work correct in x32 anyway. >>>> >>>> I think LO_HI_LONG should be defined only if __WORDSIZE != 64 >>>> and p{read,write}[v]64 should be added to wordsize-64/syscalls.list. >>> >>> Indeed this is something I will get back now that I see x32 fails >>> with current implementation. I got the wrong idea all ILP32 would >>> use the compat code (as MIPS64n64). > > x86/entry/syscalls/syscall_64.tbl has > > 17 common pread64 sys_pread64 > 295 64 preadv sys_preadv > 534 x32 preadv compat_sys_preadv64 > > compat_sys_preadv64 takes 64-bit offset in one piece. > >>> About the patch, based on current timeframe I think your solution is >>> the safest one for x86. > > I will check it in. > >>> However I do would like to re-enable it on x86, including x32, when 2.25 >>> opens. The patch below changes slight on how {INLINE,INTERNAL}_SYSCALL >>> works on x32: int/long/pointer should be passed as before and uint64_t >>> arguments should be passed as register all well without casting. >>> If you have time I would like to check if it would be acceptable for >>> 2.25. It shows no regression on x32, including the tst-preadwrite{64} >>> testcase you sent earlier: >> >> I will give it a try. > > I got no regressions on x32. But on x86-64, I got > > Inconsistency detected by ld.so: dl-tls.c: 493: _dl_allocate_tls_init: > Assertion `listp->slotinfo[cnt].gen <= GL(dl_tls_generation)' failed! > FAIL: nptl/tst-stack4 > > It doesn't fail every time. I am wondering if typeof returns the > correct type on > signed/unsigned integer constants. > I think the issue is not really related to this patch. I also see this very issue in i386, powerpc64le and aarch64 in a inconsistent way Regarding typeof type for signed/unsigned, it looks like gcc gets it right: $ cat test.c #include <stdio.h> unsigned char uc; signed char sc; int foo_1 () { typeof (uc) c = 255; return c > 128; } int foo_2 () { typeof (sc) c = 255; return c > 128; } int main (void) { printf ("%i\n%i\n", foo_1 (), foo_2 ()); return 0; } $ gcc test.c $ ./a.out 1 0
On Wed, Jul 13, 2016 at 8:25 AM, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > > On 12/07/2016 17:19, H.J. Lu wrote: >> On Tue, Jul 12, 2016 at 8:49 AM, H.J. Lu <hjl.tools@gmail.com> wrote: >>> On Tue, Jul 12, 2016 at 8:19 AM, Adhemerval Zanella >>> <adhemerval.zanella@linaro.org> wrote: >>>> >>>> >>>> On 12/07/2016 16:03, H.J. Lu wrote: >>>>> On Tue, Jul 12, 2016 at 7:04 AM, Adhemerval Zanella >>>>> <adhemerval.zanella@linaro.org> wrote: >>>>>> >>>>>> >>>>>> On 12/07/2016 14:26, H.J. Lu wrote: >>>>>>> 64-bit off_t in pread64, preadv, pwrite64 and pwritev syscalls is pased >>>>>>> in one 64-bit register for both x32 and x86-64. Since the inline >>>>>>> asm statement only passes long, which is 32-bit for x32, in registers, >>>>>>> 64-bit off_t is truncated to 32-bit on x32. Since __ASSUME_PREADV and >>>>>>> __ASSUME_PWRITEV are defined unconditionally, these syscalls can be >>>>>>> implemented in syscalls.list to pass 64-bit off_t in one 64-bit register. >>>>>>> >>>>>>> Tested on x86-64 and x32 with off_t > 4GB on pread64/pwrite64 and >>>>>>> preadv64/pwritev64. >>>>>>> >>>>>>> OK for master? >>>>>>> >>>>>>> H.J. >>>>>>> --- >>>>>>> [BZ #20348] >>>>>>> * sysdeps/unix/sysv/linux/x86_64/syscalls.list: Add pread64, >>>>>>> preadv64, pwrite64 and pwritev64. >>>>>>> --- >>>>>>> sysdeps/unix/sysv/linux/x86_64/syscalls.list | 4 ++++ >>>>>>> 1 file changed, 4 insertions(+) >>>>>>> >>>>>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/syscalls.list b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>> index d09d101..bcf6370 100644 >>>>>>> --- a/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>> @@ -6,6 +6,10 @@ msgctl - msgctl i:iip __msgctl msgctl >>>>>>> msgget - msgget i:ii __msgget msgget >>>>>>> msgrcv - msgrcv Ci:ibnii __msgrcv msgrcv >>>>>>> msgsnd - msgsnd Ci:ibni __msgsnd msgsnd >>>>>>> +pread64 - pread64 Ci:ipii __libc_pread __libc_pread64 __pread64 pread64 __pread pread >>>>>>> +preadv64 - preadv Ci:ipii preadv64 preadv >>>>>>> +pwrite64 - pwrite64 Ci:ipii __libc_pwrite __pwrite64 pwrite64 __pwrite pwrite >>>>>>> +pwritev64 - pwritev Ci:ipii pwritev64 pwritev >>>>>>> shmat - shmat i:ipi __shmat shmat >>>>>>> shmctl - shmctl i:iip __shmctl shmctl >>>>>>> shmdt - shmdt i:s __shmdt shmdt >>>>>>> >>>>>> >>>>>> This is pretty much what I suggested [1] with the difference that my >>>>>> idea is to re-add the auto-generated wrappers just for x32. I would >>>>>> prefer to keep x86_64 continue to use default implementation and >>>>>> work on fix {INLINE,INTERNAL}_SYSCALL to work with 64-bit arguments >>>>>> in x32. >>>>> >>>>> syscalls.list is the preferred way to implement a system call, not >>>>> {INLINE,INTERNAL}_SYSCALL. There is no reason only to do it >>>>> for x32. As for {INLINE,INTERNAL}_SYSCALL with 64-bit argument, >>>>> they are only used in p{read,write}[v]64 and it is incorrect to pass long >>>>> as 64-bit integer to x32 syscall if the argument is long or pointer. >>>> >>>> The idea I am trying to push with all these consolidation are twofold: >>>> >>>> 1. Remove the complexity implementation files and way to call syscalls >>>> inside GLIBC and make easier to implement new ports >>> >>> That is fine. >>> >>>> 2. Remove the redundant sysdep-cancel.h requirement for each port >>>> which basically implementations pic/nopic function calls in assembly. >>>> This is also remove implementation complexity and make easier for >>>> new port implementation. >>>> >>>> Also, for 2. it also helps the long standing pthread cancellation >>>> (bz#12683) by focusing all cancellation calls in only one implementation. >>>> >>>> I do get the idea the auto-generation call is currently preferred way >>>> to implementation syscalls, but I think for *cancellable* way we should >>>> push to implement using SYSCALL_CANCEL (which is in turn based on >>>> INTERNAL_SYSCALL). >>> >>> That is fine also. But on x86-64, we should use syscalls.list if possible, >>> especially for cancellation calls. >>> >>> With {INLINE,INTERNAL}_SYSCALL: >>> >>> 0000000000000000 <__libc_pread>: >>> 0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <__libc_pread+0x6> >>> 6: 49 89 ca mov %rcx,%r10 >>> 9: 85 c0 test %eax,%eax >>> b: 75 2b jne 38 <__libc_pread+0x38> >>> d: 48 63 ff movslq %edi,%rdi >>> 10: b8 11 00 00 00 mov $0x11,%eax >>> 15: 0f 05 syscall >>> 17: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >>> 1d: 77 01 ja 20 <__libc_pread+0x20> >>> 1f: c3 retq >>> 20: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 27 <__libc_pread+0x27> >>> 27: f7 d8 neg %eax >>> 29: 64 89 02 mov %eax,%fs:(%rdx) >>> 2c: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >>> 33: c3 retq >>> 34: 0f 1f 40 00 nopl 0x0(%rax) >>> 38: 41 55 push %r13 >>> 3a: 41 54 push %r12 >>> 3c: 49 89 cd mov %rcx,%r13 >>> 3f: 55 push %rbp >>> 40: 53 push %rbx >>> 41: 49 89 d4 mov %rdx,%r12 >>> 44: 48 89 f5 mov %rsi,%rbp >>> 47: 89 fb mov %edi,%ebx >>> 49: 48 83 ec 18 sub $0x18,%rsp >>> 4d: e8 00 00 00 00 callq 52 <__libc_pread+0x52> >>> 52: 4d 89 ea mov %r13,%r10 >>> 55: 41 89 c0 mov %eax,%r8d >>> 58: 4c 89 e2 mov %r12,%rdx >>> 5b: 48 89 ee mov %rbp,%rsi >>> 5e: 48 63 fb movslq %ebx,%rdi >>> 61: b8 11 00 00 00 mov $0x11,%eax >>> 66: 0f 05 syscall >>> 68: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >>> 6e: 77 1d ja 8d <__libc_pread+0x8d> >>> 70: 44 89 c7 mov %r8d,%edi >>> 73: 48 89 44 24 08 mov %rax,0x8(%rsp) >>> 78: e8 00 00 00 00 callq 7d <__libc_pread+0x7d> >>> 7d: 48 8b 44 24 08 mov 0x8(%rsp),%rax >>> 82: 48 83 c4 18 add $0x18,%rsp >>> 86: 5b pop %rbx >>> 87: 5d pop %rbp >>> 88: 41 5c pop %r12 >>> 8a: 41 5d pop %r13 >>> 8c: c3 retq >>> 8d: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 94 <__libc_pread+0x94> >>> 94: f7 d8 neg %eax >>> 96: 64 89 02 mov %eax,%fs:(%rdx) >>> 99: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >>> a0: eb ce jmp 70 <__libc_pread+0x70> >>> >>> With syscalls.list: >>> >>> Disassembly of section .text: >>> >>> 0000000000000000 <__libc_pread>: >>> 0: 83 3d 00 00 00 00 00 cmpl $0x0,0x0(%rip) # 7 <__libc_pread+0x7> >>> 7: 75 17 jne 20 <__pread64_nocancel+0x17> >>> >>> 0000000000000009 <__pread64_nocancel>: >>> 9: 49 89 ca mov %rcx,%r10 >>> c: b8 11 00 00 00 mov $0x11,%eax >>> 11: 0f 05 syscall >>> 13: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >>> 19: 0f 83 00 00 00 00 jae 1f <__pread64_nocancel+0x16> >>> 1f: c3 retq >>> 20: 48 83 ec 08 sub $0x8,%rsp >>> 24: e8 00 00 00 00 callq 29 <__pread64_nocancel+0x20> >>> 29: 48 89 04 24 mov %rax,(%rsp) >>> 2d: 49 89 ca mov %rcx,%r10 >>> 30: b8 11 00 00 00 mov $0x11,%eax >>> 35: 0f 05 syscall >>> 37: 48 8b 3c 24 mov (%rsp),%rdi >>> 3b: 48 89 c2 mov %rax,%rdx >>> 3e: e8 00 00 00 00 callq 43 <__pread64_nocancel+0x3a> >>> 43: 48 89 d0 mov %rdx,%rax >>> 46: 48 83 c4 08 add $0x8,%rsp >>> 4a: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >>> 50: 0f 83 00 00 00 00 jae 56 <__pread64_nocancel+0x4d> >>> 56: c3 retq >>> >>> This one is much better. > > I think SYSCALL_CANCEL can be slight improved by using INTERNAL_SYSCALL > instead of INLINE_SYSCALL and checking the error outside of cancellation > handling. I see a slight improvement, however sysdep-cancel.h is > still slight better. > > However the idea is I am trying to push in the end is to remove the > sysdep-cancel.h because the new cancellation scheme will require it > to basically do a function call instead of calling the cancellation > enable/disable (which will be also removed). > >>> >>>>> >>>>>> Also, I think we should remove the first try to fix LO_HI_LONG [2], >>>>>> since 64 bits argument does not work correct in x32 anyway. >>>>> >>>>> I think LO_HI_LONG should be defined only if __WORDSIZE != 64 >>>>> and p{read,write}[v]64 should be added to wordsize-64/syscalls.list. >>>> >>>> Indeed this is something I will get back now that I see x32 fails >>>> with current implementation. I got the wrong idea all ILP32 would >>>> use the compat code (as MIPS64n64). >> >> x86/entry/syscalls/syscall_64.tbl has >> >> 17 common pread64 sys_pread64 >> 295 64 preadv sys_preadv >> 534 x32 preadv compat_sys_preadv64 >> >> compat_sys_preadv64 takes 64-bit offset in one piece. >> >>>> About the patch, based on current timeframe I think your solution is >>>> the safest one for x86. >> >> I will check it in. >> >>>> However I do would like to re-enable it on x86, including x32, when 2.25 >>>> opens. The patch below changes slight on how {INLINE,INTERNAL}_SYSCALL >>>> works on x32: int/long/pointer should be passed as before and uint64_t >>>> arguments should be passed as register all well without casting. >>>> If you have time I would like to check if it would be acceptable for >>>> 2.25. It shows no regression on x32, including the tst-preadwrite{64} >>>> testcase you sent earlier: >>> >>> I will give it a try. >> >> I got no regressions on x32. But on x86-64, I got >> >> Inconsistency detected by ld.so: dl-tls.c: 493: _dl_allocate_tls_init: >> Assertion `listp->slotinfo[cnt].gen <= GL(dl_tls_generation)' failed! >> FAIL: nptl/tst-stack4 >> >> It doesn't fail every time. I am wondering if typeof returns the >> correct type on >> signed/unsigned integer constants. >> > > I think the issue is not really related to this patch. I also see this > very issue in i386, powerpc64le and aarch64 in a inconsistent way > > Regarding typeof type for signed/unsigned, it looks like gcc gets it > right: > > $ cat test.c > #include <stdio.h> > > unsigned char uc; > signed char sc; > > int foo_1 () > { > typeof (uc) c = 255; > return c > 128; > } > > int foo_2 () > { > typeof (sc) c = 255; > return c > 128; > } > > int > main (void) > { > printf ("%i\n%i\n", foo_1 (), foo_2 ()); > return 0; > } > $ gcc test.c > $ ./a.out > 1 > 0 > That is not what I meant: hjl@gnu-6 tmp]$ cat y.c #define NUM (-214) typedef typeof (NUM) t1; t1 foo1 () { return NUM; } typedef long int t2; t2 foo2 () { return NUM; } [hjl@gnu-6 tmp]$ gcc -S -O2 y.c [hjl@gnu-6 tmp]$ cat y.s .file "y.c" .text .p2align 4,,15 .globl foo1 .type foo1, @function foo1: .LFB0: .cfi_startproc movl $-214, %eax ^^^^^^^^^^^^^^^^^^^^ ret .cfi_endproc .LFE0: .size foo1, .-foo1 .p2align 4,,15 .globl foo2 .type foo2, @function foo2: .LFB1: .cfi_startproc movq $-214, %rax ^^^^^^^^^^^^^^^^^^^^^ ret .cfi_endproc .LFE1: .size foo2, .-foo2 .ident "GCC: (GNU) 6.1.1 20160621 (Red Hat 6.1.1-3)" .section .note.GNU-stack,"",@progbits [hjl@gnu-6 tmp]$
On 13/07/2016 18:27, H.J. Lu wrote: > On Wed, Jul 13, 2016 at 8:25 AM, Adhemerval Zanella > <adhemerval.zanella@linaro.org> wrote: >> >> >> On 12/07/2016 17:19, H.J. Lu wrote: >>> On Tue, Jul 12, 2016 at 8:49 AM, H.J. Lu <hjl.tools@gmail.com> wrote: >>>> On Tue, Jul 12, 2016 at 8:19 AM, Adhemerval Zanella >>>> <adhemerval.zanella@linaro.org> wrote: >>>>> >>>>> >>>>> On 12/07/2016 16:03, H.J. Lu wrote: >>>>>> On Tue, Jul 12, 2016 at 7:04 AM, Adhemerval Zanella >>>>>> <adhemerval.zanella@linaro.org> wrote: >>>>>>> >>>>>>> >>>>>>> On 12/07/2016 14:26, H.J. Lu wrote: >>>>>>>> 64-bit off_t in pread64, preadv, pwrite64 and pwritev syscalls is pased >>>>>>>> in one 64-bit register for both x32 and x86-64. Since the inline >>>>>>>> asm statement only passes long, which is 32-bit for x32, in registers, >>>>>>>> 64-bit off_t is truncated to 32-bit on x32. Since __ASSUME_PREADV and >>>>>>>> __ASSUME_PWRITEV are defined unconditionally, these syscalls can be >>>>>>>> implemented in syscalls.list to pass 64-bit off_t in one 64-bit register. >>>>>>>> >>>>>>>> Tested on x86-64 and x32 with off_t > 4GB on pread64/pwrite64 and >>>>>>>> preadv64/pwritev64. >>>>>>>> >>>>>>>> OK for master? >>>>>>>> >>>>>>>> H.J. >>>>>>>> --- >>>>>>>> [BZ #20348] >>>>>>>> * sysdeps/unix/sysv/linux/x86_64/syscalls.list: Add pread64, >>>>>>>> preadv64, pwrite64 and pwritev64. >>>>>>>> --- >>>>>>>> sysdeps/unix/sysv/linux/x86_64/syscalls.list | 4 ++++ >>>>>>>> 1 file changed, 4 insertions(+) >>>>>>>> >>>>>>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/syscalls.list b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>>> index d09d101..bcf6370 100644 >>>>>>>> --- a/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>>> @@ -6,6 +6,10 @@ msgctl - msgctl i:iip __msgctl msgctl >>>>>>>> msgget - msgget i:ii __msgget msgget >>>>>>>> msgrcv - msgrcv Ci:ibnii __msgrcv msgrcv >>>>>>>> msgsnd - msgsnd Ci:ibni __msgsnd msgsnd >>>>>>>> +pread64 - pread64 Ci:ipii __libc_pread __libc_pread64 __pread64 pread64 __pread pread >>>>>>>> +preadv64 - preadv Ci:ipii preadv64 preadv >>>>>>>> +pwrite64 - pwrite64 Ci:ipii __libc_pwrite __pwrite64 pwrite64 __pwrite pwrite >>>>>>>> +pwritev64 - pwritev Ci:ipii pwritev64 pwritev >>>>>>>> shmat - shmat i:ipi __shmat shmat >>>>>>>> shmctl - shmctl i:iip __shmctl shmctl >>>>>>>> shmdt - shmdt i:s __shmdt shmdt >>>>>>>> >>>>>>> >>>>>>> This is pretty much what I suggested [1] with the difference that my >>>>>>> idea is to re-add the auto-generated wrappers just for x32. I would >>>>>>> prefer to keep x86_64 continue to use default implementation and >>>>>>> work on fix {INLINE,INTERNAL}_SYSCALL to work with 64-bit arguments >>>>>>> in x32. >>>>>> >>>>>> syscalls.list is the preferred way to implement a system call, not >>>>>> {INLINE,INTERNAL}_SYSCALL. There is no reason only to do it >>>>>> for x32. As for {INLINE,INTERNAL}_SYSCALL with 64-bit argument, >>>>>> they are only used in p{read,write}[v]64 and it is incorrect to pass long >>>>>> as 64-bit integer to x32 syscall if the argument is long or pointer. >>>>> >>>>> The idea I am trying to push with all these consolidation are twofold: >>>>> >>>>> 1. Remove the complexity implementation files and way to call syscalls >>>>> inside GLIBC and make easier to implement new ports >>>> >>>> That is fine. >>>> >>>>> 2. Remove the redundant sysdep-cancel.h requirement for each port >>>>> which basically implementations pic/nopic function calls in assembly. >>>>> This is also remove implementation complexity and make easier for >>>>> new port implementation. >>>>> >>>>> Also, for 2. it also helps the long standing pthread cancellation >>>>> (bz#12683) by focusing all cancellation calls in only one implementation. >>>>> >>>>> I do get the idea the auto-generation call is currently preferred way >>>>> to implementation syscalls, but I think for *cancellable* way we should >>>>> push to implement using SYSCALL_CANCEL (which is in turn based on >>>>> INTERNAL_SYSCALL). >>>> >>>> That is fine also. But on x86-64, we should use syscalls.list if possible, >>>> especially for cancellation calls. >>>> >>>> With {INLINE,INTERNAL}_SYSCALL: >>>> >>>> 0000000000000000 <__libc_pread>: >>>> 0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <__libc_pread+0x6> >>>> 6: 49 89 ca mov %rcx,%r10 >>>> 9: 85 c0 test %eax,%eax >>>> b: 75 2b jne 38 <__libc_pread+0x38> >>>> d: 48 63 ff movslq %edi,%rdi >>>> 10: b8 11 00 00 00 mov $0x11,%eax >>>> 15: 0f 05 syscall >>>> 17: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >>>> 1d: 77 01 ja 20 <__libc_pread+0x20> >>>> 1f: c3 retq >>>> 20: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 27 <__libc_pread+0x27> >>>> 27: f7 d8 neg %eax >>>> 29: 64 89 02 mov %eax,%fs:(%rdx) >>>> 2c: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >>>> 33: c3 retq >>>> 34: 0f 1f 40 00 nopl 0x0(%rax) >>>> 38: 41 55 push %r13 >>>> 3a: 41 54 push %r12 >>>> 3c: 49 89 cd mov %rcx,%r13 >>>> 3f: 55 push %rbp >>>> 40: 53 push %rbx >>>> 41: 49 89 d4 mov %rdx,%r12 >>>> 44: 48 89 f5 mov %rsi,%rbp >>>> 47: 89 fb mov %edi,%ebx >>>> 49: 48 83 ec 18 sub $0x18,%rsp >>>> 4d: e8 00 00 00 00 callq 52 <__libc_pread+0x52> >>>> 52: 4d 89 ea mov %r13,%r10 >>>> 55: 41 89 c0 mov %eax,%r8d >>>> 58: 4c 89 e2 mov %r12,%rdx >>>> 5b: 48 89 ee mov %rbp,%rsi >>>> 5e: 48 63 fb movslq %ebx,%rdi >>>> 61: b8 11 00 00 00 mov $0x11,%eax >>>> 66: 0f 05 syscall >>>> 68: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >>>> 6e: 77 1d ja 8d <__libc_pread+0x8d> >>>> 70: 44 89 c7 mov %r8d,%edi >>>> 73: 48 89 44 24 08 mov %rax,0x8(%rsp) >>>> 78: e8 00 00 00 00 callq 7d <__libc_pread+0x7d> >>>> 7d: 48 8b 44 24 08 mov 0x8(%rsp),%rax >>>> 82: 48 83 c4 18 add $0x18,%rsp >>>> 86: 5b pop %rbx >>>> 87: 5d pop %rbp >>>> 88: 41 5c pop %r12 >>>> 8a: 41 5d pop %r13 >>>> 8c: c3 retq >>>> 8d: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 94 <__libc_pread+0x94> >>>> 94: f7 d8 neg %eax >>>> 96: 64 89 02 mov %eax,%fs:(%rdx) >>>> 99: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >>>> a0: eb ce jmp 70 <__libc_pread+0x70> >>>> >>>> With syscalls.list: >>>> >>>> Disassembly of section .text: >>>> >>>> 0000000000000000 <__libc_pread>: >>>> 0: 83 3d 00 00 00 00 00 cmpl $0x0,0x0(%rip) # 7 <__libc_pread+0x7> >>>> 7: 75 17 jne 20 <__pread64_nocancel+0x17> >>>> >>>> 0000000000000009 <__pread64_nocancel>: >>>> 9: 49 89 ca mov %rcx,%r10 >>>> c: b8 11 00 00 00 mov $0x11,%eax >>>> 11: 0f 05 syscall >>>> 13: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >>>> 19: 0f 83 00 00 00 00 jae 1f <__pread64_nocancel+0x16> >>>> 1f: c3 retq >>>> 20: 48 83 ec 08 sub $0x8,%rsp >>>> 24: e8 00 00 00 00 callq 29 <__pread64_nocancel+0x20> >>>> 29: 48 89 04 24 mov %rax,(%rsp) >>>> 2d: 49 89 ca mov %rcx,%r10 >>>> 30: b8 11 00 00 00 mov $0x11,%eax >>>> 35: 0f 05 syscall >>>> 37: 48 8b 3c 24 mov (%rsp),%rdi >>>> 3b: 48 89 c2 mov %rax,%rdx >>>> 3e: e8 00 00 00 00 callq 43 <__pread64_nocancel+0x3a> >>>> 43: 48 89 d0 mov %rdx,%rax >>>> 46: 48 83 c4 08 add $0x8,%rsp >>>> 4a: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >>>> 50: 0f 83 00 00 00 00 jae 56 <__pread64_nocancel+0x4d> >>>> 56: c3 retq >>>> >>>> This one is much better. >> >> I think SYSCALL_CANCEL can be slight improved by using INTERNAL_SYSCALL >> instead of INLINE_SYSCALL and checking the error outside of cancellation >> handling. I see a slight improvement, however sysdep-cancel.h is >> still slight better. >> >> However the idea is I am trying to push in the end is to remove the >> sysdep-cancel.h because the new cancellation scheme will require it >> to basically do a function call instead of calling the cancellation >> enable/disable (which will be also removed). >> >>>> >>>>>> >>>>>>> Also, I think we should remove the first try to fix LO_HI_LONG [2], >>>>>>> since 64 bits argument does not work correct in x32 anyway. >>>>>> >>>>>> I think LO_HI_LONG should be defined only if __WORDSIZE != 64 >>>>>> and p{read,write}[v]64 should be added to wordsize-64/syscalls.list. >>>>> >>>>> Indeed this is something I will get back now that I see x32 fails >>>>> with current implementation. I got the wrong idea all ILP32 would >>>>> use the compat code (as MIPS64n64). >>> >>> x86/entry/syscalls/syscall_64.tbl has >>> >>> 17 common pread64 sys_pread64 >>> 295 64 preadv sys_preadv >>> 534 x32 preadv compat_sys_preadv64 >>> >>> compat_sys_preadv64 takes 64-bit offset in one piece. >>> >>>>> About the patch, based on current timeframe I think your solution is >>>>> the safest one for x86. >>> >>> I will check it in. >>> >>>>> However I do would like to re-enable it on x86, including x32, when 2.25 >>>>> opens. The patch below changes slight on how {INLINE,INTERNAL}_SYSCALL >>>>> works on x32: int/long/pointer should be passed as before and uint64_t >>>>> arguments should be passed as register all well without casting. >>>>> If you have time I would like to check if it would be acceptable for >>>>> 2.25. It shows no regression on x32, including the tst-preadwrite{64} >>>>> testcase you sent earlier: >>>> >>>> I will give it a try. >>> >>> I got no regressions on x32. But on x86-64, I got >>> >>> Inconsistency detected by ld.so: dl-tls.c: 493: _dl_allocate_tls_init: >>> Assertion `listp->slotinfo[cnt].gen <= GL(dl_tls_generation)' failed! >>> FAIL: nptl/tst-stack4 >>> >>> It doesn't fail every time. I am wondering if typeof returns the >>> correct type on >>> signed/unsigned integer constants. >>> >> >> I think the issue is not really related to this patch. I also see this >> very issue in i386, powerpc64le and aarch64 in a inconsistent way >> >> Regarding typeof type for signed/unsigned, it looks like gcc gets it >> right: >> >> $ cat test.c >> #include <stdio.h> >> >> unsigned char uc; >> signed char sc; >> >> int foo_1 () >> { >> typeof (uc) c = 255; >> return c > 128; >> } >> >> int foo_2 () >> { >> typeof (sc) c = 255; >> return c > 128; >> } >> >> int >> main (void) >> { >> printf ("%i\n%i\n", foo_1 (), foo_2 ()); >> return 0; >> } >> $ gcc test.c >> $ ./a.out >> 1 >> 0 >> > > That is not what I meant: > > hjl@gnu-6 tmp]$ cat y.c > #define NUM (-214) > typedef typeof (NUM) t1; > > t1 > foo1 () > { > return NUM; > } > > typedef long int t2; > > t2 > foo2 () > { > return NUM; > } > [hjl@gnu-6 tmp]$ gcc -S -O2 y.c > [hjl@gnu-6 tmp]$ cat y.s > .file "y.c" > .text > .p2align 4,,15 > .globl foo1 > .type foo1, @function > foo1: > .LFB0: > .cfi_startproc > movl $-214, %eax > ^^^^^^^^^^^^^^^^^^^^ > ret > .cfi_endproc > .LFE0: > .size foo1, .-foo1 > .p2align 4,,15 > .globl foo2 > .type foo2, @function > foo2: > .LFB1: > .cfi_startproc > movq $-214, %rax > ^^^^^^^^^^^^^^^^^^^^^ > ret > .cfi_endproc > .LFE1: > .size foo2, .-foo2 > .ident "GCC: (GNU) 6.1.1 20160621 (Red Hat 6.1.1-3)" > .section .note.GNU-stack,"",@progbits > [hjl@gnu-6 tmp]$ > > Right, I see your point now. However do you think this could be an issue with x32 using {INTERNAL,INLINE}_SYSCALL? I see that these are for internal usage, so we can be judicious on how to use and the constraints about them.
On Thu, Jul 14, 2016 at 3:45 AM, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > > On 13/07/2016 18:27, H.J. Lu wrote: >> On Wed, Jul 13, 2016 at 8:25 AM, Adhemerval Zanella >> <adhemerval.zanella@linaro.org> wrote: >>> >>> >>> On 12/07/2016 17:19, H.J. Lu wrote: >>>> On Tue, Jul 12, 2016 at 8:49 AM, H.J. Lu <hjl.tools@gmail.com> wrote: >>>>> On Tue, Jul 12, 2016 at 8:19 AM, Adhemerval Zanella >>>>> <adhemerval.zanella@linaro.org> wrote: >>>>>> >>>>>> >>>>>> On 12/07/2016 16:03, H.J. Lu wrote: >>>>>>> On Tue, Jul 12, 2016 at 7:04 AM, Adhemerval Zanella >>>>>>> <adhemerval.zanella@linaro.org> wrote: >>>>>>>> >>>>>>>> >>>>>>>> On 12/07/2016 14:26, H.J. Lu wrote: >>>>>>>>> 64-bit off_t in pread64, preadv, pwrite64 and pwritev syscalls is pased >>>>>>>>> in one 64-bit register for both x32 and x86-64. Since the inline >>>>>>>>> asm statement only passes long, which is 32-bit for x32, in registers, >>>>>>>>> 64-bit off_t is truncated to 32-bit on x32. Since __ASSUME_PREADV and >>>>>>>>> __ASSUME_PWRITEV are defined unconditionally, these syscalls can be >>>>>>>>> implemented in syscalls.list to pass 64-bit off_t in one 64-bit register. >>>>>>>>> >>>>>>>>> Tested on x86-64 and x32 with off_t > 4GB on pread64/pwrite64 and >>>>>>>>> preadv64/pwritev64. >>>>>>>>> >>>>>>>>> OK for master? >>>>>>>>> >>>>>>>>> H.J. >>>>>>>>> --- >>>>>>>>> [BZ #20348] >>>>>>>>> * sysdeps/unix/sysv/linux/x86_64/syscalls.list: Add pread64, >>>>>>>>> preadv64, pwrite64 and pwritev64. >>>>>>>>> --- >>>>>>>>> sysdeps/unix/sysv/linux/x86_64/syscalls.list | 4 ++++ >>>>>>>>> 1 file changed, 4 insertions(+) >>>>>>>>> >>>>>>>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/syscalls.list b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>>>> index d09d101..bcf6370 100644 >>>>>>>>> --- a/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/syscalls.list >>>>>>>>> @@ -6,6 +6,10 @@ msgctl - msgctl i:iip __msgctl msgctl >>>>>>>>> msgget - msgget i:ii __msgget msgget >>>>>>>>> msgrcv - msgrcv Ci:ibnii __msgrcv msgrcv >>>>>>>>> msgsnd - msgsnd Ci:ibni __msgsnd msgsnd >>>>>>>>> +pread64 - pread64 Ci:ipii __libc_pread __libc_pread64 __pread64 pread64 __pread pread >>>>>>>>> +preadv64 - preadv Ci:ipii preadv64 preadv >>>>>>>>> +pwrite64 - pwrite64 Ci:ipii __libc_pwrite __pwrite64 pwrite64 __pwrite pwrite >>>>>>>>> +pwritev64 - pwritev Ci:ipii pwritev64 pwritev >>>>>>>>> shmat - shmat i:ipi __shmat shmat >>>>>>>>> shmctl - shmctl i:iip __shmctl shmctl >>>>>>>>> shmdt - shmdt i:s __shmdt shmdt >>>>>>>>> >>>>>>>> >>>>>>>> This is pretty much what I suggested [1] with the difference that my >>>>>>>> idea is to re-add the auto-generated wrappers just for x32. I would >>>>>>>> prefer to keep x86_64 continue to use default implementation and >>>>>>>> work on fix {INLINE,INTERNAL}_SYSCALL to work with 64-bit arguments >>>>>>>> in x32. >>>>>>> >>>>>>> syscalls.list is the preferred way to implement a system call, not >>>>>>> {INLINE,INTERNAL}_SYSCALL. There is no reason only to do it >>>>>>> for x32. As for {INLINE,INTERNAL}_SYSCALL with 64-bit argument, >>>>>>> they are only used in p{read,write}[v]64 and it is incorrect to pass long >>>>>>> as 64-bit integer to x32 syscall if the argument is long or pointer. >>>>>> >>>>>> The idea I am trying to push with all these consolidation are twofold: >>>>>> >>>>>> 1. Remove the complexity implementation files and way to call syscalls >>>>>> inside GLIBC and make easier to implement new ports >>>>> >>>>> That is fine. >>>>> >>>>>> 2. Remove the redundant sysdep-cancel.h requirement for each port >>>>>> which basically implementations pic/nopic function calls in assembly. >>>>>> This is also remove implementation complexity and make easier for >>>>>> new port implementation. >>>>>> >>>>>> Also, for 2. it also helps the long standing pthread cancellation >>>>>> (bz#12683) by focusing all cancellation calls in only one implementation. >>>>>> >>>>>> I do get the idea the auto-generation call is currently preferred way >>>>>> to implementation syscalls, but I think for *cancellable* way we should >>>>>> push to implement using SYSCALL_CANCEL (which is in turn based on >>>>>> INTERNAL_SYSCALL). >>>>> >>>>> That is fine also. But on x86-64, we should use syscalls.list if possible, >>>>> especially for cancellation calls. >>>>> >>>>> With {INLINE,INTERNAL}_SYSCALL: >>>>> >>>>> 0000000000000000 <__libc_pread>: >>>>> 0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <__libc_pread+0x6> >>>>> 6: 49 89 ca mov %rcx,%r10 >>>>> 9: 85 c0 test %eax,%eax >>>>> b: 75 2b jne 38 <__libc_pread+0x38> >>>>> d: 48 63 ff movslq %edi,%rdi >>>>> 10: b8 11 00 00 00 mov $0x11,%eax >>>>> 15: 0f 05 syscall >>>>> 17: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >>>>> 1d: 77 01 ja 20 <__libc_pread+0x20> >>>>> 1f: c3 retq >>>>> 20: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 27 <__libc_pread+0x27> >>>>> 27: f7 d8 neg %eax >>>>> 29: 64 89 02 mov %eax,%fs:(%rdx) >>>>> 2c: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >>>>> 33: c3 retq >>>>> 34: 0f 1f 40 00 nopl 0x0(%rax) >>>>> 38: 41 55 push %r13 >>>>> 3a: 41 54 push %r12 >>>>> 3c: 49 89 cd mov %rcx,%r13 >>>>> 3f: 55 push %rbp >>>>> 40: 53 push %rbx >>>>> 41: 49 89 d4 mov %rdx,%r12 >>>>> 44: 48 89 f5 mov %rsi,%rbp >>>>> 47: 89 fb mov %edi,%ebx >>>>> 49: 48 83 ec 18 sub $0x18,%rsp >>>>> 4d: e8 00 00 00 00 callq 52 <__libc_pread+0x52> >>>>> 52: 4d 89 ea mov %r13,%r10 >>>>> 55: 41 89 c0 mov %eax,%r8d >>>>> 58: 4c 89 e2 mov %r12,%rdx >>>>> 5b: 48 89 ee mov %rbp,%rsi >>>>> 5e: 48 63 fb movslq %ebx,%rdi >>>>> 61: b8 11 00 00 00 mov $0x11,%eax >>>>> 66: 0f 05 syscall >>>>> 68: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax >>>>> 6e: 77 1d ja 8d <__libc_pread+0x8d> >>>>> 70: 44 89 c7 mov %r8d,%edi >>>>> 73: 48 89 44 24 08 mov %rax,0x8(%rsp) >>>>> 78: e8 00 00 00 00 callq 7d <__libc_pread+0x7d> >>>>> 7d: 48 8b 44 24 08 mov 0x8(%rsp),%rax >>>>> 82: 48 83 c4 18 add $0x18,%rsp >>>>> 86: 5b pop %rbx >>>>> 87: 5d pop %rbp >>>>> 88: 41 5c pop %r12 >>>>> 8a: 41 5d pop %r13 >>>>> 8c: c3 retq >>>>> 8d: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 94 <__libc_pread+0x94> >>>>> 94: f7 d8 neg %eax >>>>> 96: 64 89 02 mov %eax,%fs:(%rdx) >>>>> 99: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax >>>>> a0: eb ce jmp 70 <__libc_pread+0x70> >>>>> >>>>> With syscalls.list: >>>>> >>>>> Disassembly of section .text: >>>>> >>>>> 0000000000000000 <__libc_pread>: >>>>> 0: 83 3d 00 00 00 00 00 cmpl $0x0,0x0(%rip) # 7 <__libc_pread+0x7> >>>>> 7: 75 17 jne 20 <__pread64_nocancel+0x17> >>>>> >>>>> 0000000000000009 <__pread64_nocancel>: >>>>> 9: 49 89 ca mov %rcx,%r10 >>>>> c: b8 11 00 00 00 mov $0x11,%eax >>>>> 11: 0f 05 syscall >>>>> 13: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >>>>> 19: 0f 83 00 00 00 00 jae 1f <__pread64_nocancel+0x16> >>>>> 1f: c3 retq >>>>> 20: 48 83 ec 08 sub $0x8,%rsp >>>>> 24: e8 00 00 00 00 callq 29 <__pread64_nocancel+0x20> >>>>> 29: 48 89 04 24 mov %rax,(%rsp) >>>>> 2d: 49 89 ca mov %rcx,%r10 >>>>> 30: b8 11 00 00 00 mov $0x11,%eax >>>>> 35: 0f 05 syscall >>>>> 37: 48 8b 3c 24 mov (%rsp),%rdi >>>>> 3b: 48 89 c2 mov %rax,%rdx >>>>> 3e: e8 00 00 00 00 callq 43 <__pread64_nocancel+0x3a> >>>>> 43: 48 89 d0 mov %rdx,%rax >>>>> 46: 48 83 c4 08 add $0x8,%rsp >>>>> 4a: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax >>>>> 50: 0f 83 00 00 00 00 jae 56 <__pread64_nocancel+0x4d> >>>>> 56: c3 retq >>>>> >>>>> This one is much better. >>> >>> I think SYSCALL_CANCEL can be slight improved by using INTERNAL_SYSCALL >>> instead of INLINE_SYSCALL and checking the error outside of cancellation >>> handling. I see a slight improvement, however sysdep-cancel.h is >>> still slight better. >>> >>> However the idea is I am trying to push in the end is to remove the >>> sysdep-cancel.h because the new cancellation scheme will require it >>> to basically do a function call instead of calling the cancellation >>> enable/disable (which will be also removed). >>> >>>>> >>>>>>> >>>>>>>> Also, I think we should remove the first try to fix LO_HI_LONG [2], >>>>>>>> since 64 bits argument does not work correct in x32 anyway. >>>>>>> >>>>>>> I think LO_HI_LONG should be defined only if __WORDSIZE != 64 >>>>>>> and p{read,write}[v]64 should be added to wordsize-64/syscalls.list. >>>>>> >>>>>> Indeed this is something I will get back now that I see x32 fails >>>>>> with current implementation. I got the wrong idea all ILP32 would >>>>>> use the compat code (as MIPS64n64). >>>> >>>> x86/entry/syscalls/syscall_64.tbl has >>>> >>>> 17 common pread64 sys_pread64 >>>> 295 64 preadv sys_preadv >>>> 534 x32 preadv compat_sys_preadv64 >>>> >>>> compat_sys_preadv64 takes 64-bit offset in one piece. >>>> >>>>>> About the patch, based on current timeframe I think your solution is >>>>>> the safest one for x86. >>>> >>>> I will check it in. >>>> >>>>>> However I do would like to re-enable it on x86, including x32, when 2.25 >>>>>> opens. The patch below changes slight on how {INLINE,INTERNAL}_SYSCALL >>>>>> works on x32: int/long/pointer should be passed as before and uint64_t >>>>>> arguments should be passed as register all well without casting. >>>>>> If you have time I would like to check if it would be acceptable for >>>>>> 2.25. It shows no regression on x32, including the tst-preadwrite{64} >>>>>> testcase you sent earlier: >>>>> >>>>> I will give it a try. >>>> >>>> I got no regressions on x32. But on x86-64, I got >>>> >>>> Inconsistency detected by ld.so: dl-tls.c: 493: _dl_allocate_tls_init: >>>> Assertion `listp->slotinfo[cnt].gen <= GL(dl_tls_generation)' failed! >>>> FAIL: nptl/tst-stack4 >>>> >>>> It doesn't fail every time. I am wondering if typeof returns the >>>> correct type on >>>> signed/unsigned integer constants. >>>> >>> >>> I think the issue is not really related to this patch. I also see this >>> very issue in i386, powerpc64le and aarch64 in a inconsistent way >>> >>> Regarding typeof type for signed/unsigned, it looks like gcc gets it >>> right: >>> >>> $ cat test.c >>> #include <stdio.h> >>> >>> unsigned char uc; >>> signed char sc; >>> >>> int foo_1 () >>> { >>> typeof (uc) c = 255; >>> return c > 128; >>> } >>> >>> int foo_2 () >>> { >>> typeof (sc) c = 255; >>> return c > 128; >>> } >>> >>> int >>> main (void) >>> { >>> printf ("%i\n%i\n", foo_1 (), foo_2 ()); >>> return 0; >>> } >>> $ gcc test.c >>> $ ./a.out >>> 1 >>> 0 >>> >> >> That is not what I meant: >> >> hjl@gnu-6 tmp]$ cat y.c >> #define NUM (-214) >> typedef typeof (NUM) t1; >> >> t1 >> foo1 () >> { >> return NUM; >> } >> >> typedef long int t2; >> >> t2 >> foo2 () >> { >> return NUM; >> } >> [hjl@gnu-6 tmp]$ gcc -S -O2 y.c >> [hjl@gnu-6 tmp]$ cat y.s >> .file "y.c" >> .text >> .p2align 4,,15 >> .globl foo1 >> .type foo1, @function >> foo1: >> .LFB0: >> .cfi_startproc >> movl $-214, %eax >> ^^^^^^^^^^^^^^^^^^^^ >> ret >> .cfi_endproc >> .LFE0: >> .size foo1, .-foo1 >> .p2align 4,,15 >> .globl foo2 >> .type foo2, @function >> foo2: >> .LFB1: >> .cfi_startproc >> movq $-214, %rax >> ^^^^^^^^^^^^^^^^^^^^^ >> ret >> .cfi_endproc >> .LFE1: >> .size foo2, .-foo2 >> .ident "GCC: (GNU) 6.1.1 20160621 (Red Hat 6.1.1-3)" >> .section .note.GNU-stack,"",@progbits >> [hjl@gnu-6 tmp]$ >> >> > > Right, I see your point now. However do you think this could be an issue > with x32 using {INTERNAL,INLINE}_SYSCALL? I see that these are for internal > usage, so we can be judicious on how to use and the constraints about them. We need to use long int in {INTERNAL,INLINE}_SYSCALL. So far it isn't a problem since we never inline a syscall with 64-bit argument. If we do need to do it in the future, we can do something similar to sysdeps/unix/sysv/linux/x86_64/x32/times.c to use long long on the 64-bit argument.
diff --git a/nptl/pthread_cancel.c b/nptl/pthread_cancel.c index 1419baf..3739433 100644 --- a/nptl/pthread_cancel.c +++ b/nptl/pthread_cancel.c @@ -75,8 +75,9 @@ pthread_cancel (pthread_t th) a signal handler. But this is no allowed, pthread_cancel is not guaranteed to be async-safe. */ int val; + pid_t tid = pd->tid; val = INTERNAL_SYSCALL (tgkill, err, 3, - THREAD_GETMEM (THREAD_SELF, pid), pd->tid, + THREAD_GETMEM (THREAD_SELF, pid), tid, SIGCANCEL); if (INTERNAL_SYSCALL_ERROR_P (val, err)) diff --git a/sysdeps/unix/sysv/linux/dl-origin.c b/sysdeps/unix/sysv/linux/dl-origin.c index a560203..bd65c7d 100644 --- a/sysdeps/unix/sysv/linux/dl-origin.c +++ b/sysdeps/unix/sysv/linux/dl-origin.c @@ -31,17 +31,26 @@ the path of the application from the /proc/self/exe symlink. Try this first and fall back on the generic method if necessary. */ +static inline ssize_t +_readlink (const char *pathname, char *buf, size_t bufsize) +{ + INTERNAL_SYSCALL_DECL (err); + + ssize_t ret = INTERNAL_SYSCALL (readlink, err, 3, pathname, buf, bufsize); + if (INTERNAL_SYSCALL_ERROR_P (ret, err)) + return -1; + return ret; +} + const char * _dl_get_origin (void) { char linkval[PATH_MAX]; char *result; - int len; - INTERNAL_SYSCALL_DECL (err); + ssize_t len; - len = INTERNAL_SYSCALL (readlink, err, 3, "/proc/self/exe", linkval, - sizeof (linkval)); - if (! INTERNAL_SYSCALL_ERROR_P (len, err) && len > 0 && linkval[0] != '[') + len = _readlink ("/proc/self/exe", linkval, sizeof linkval); + if (len > 0 && linkval[0] != '[') { /* We can use this value. */ assert (linkval[0] == '/'); diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h index d23136d..0fc4856 100644 --- a/sysdeps/unix/sysv/linux/not-cancel.h +++ b/sysdeps/unix/sysv/linux/not-cancel.h @@ -24,27 +24,42 @@ #include <errno.h> #include <unistd.h> #include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <fcntl.h> /* Uncancelable open. */ +static inline int +open_not_cancel (const char *name, int flags, int mode) +{ #ifdef __NR_open -# define open_not_cancel(name, flags, mode) \ - INLINE_SYSCALL (open, 3, name, flags, mode) -# define open_not_cancel_2(name, flags) \ - INLINE_SYSCALL (open, 2, name, flags) + return INLINE_SYSCALL (open, 3, name, flags, mode); #else -# define open_not_cancel(name, flags, mode) \ INLINE_SYSCALL (openat, 4, AT_FDCWD, name, flags, mode) -# define open_not_cancel_2(name, flags) \ - INLINE_SYSCALL (openat, 3, AT_FDCWD, name, flags) #endif +} + +static inline int +open_not_cancel_2 (const char *name, int flags) +{ +#ifdef __NR_open + return INLINE_SYSCALL (open, 2, name, flags); +#else + return INLINE_SYSCALL (openat, 3, AT_FDCWD, name, flags); +#endif +} /* Uncancelable read. */ #define __read_nocancel(fd, buf, len) \ INLINE_SYSCALL (read, 3, fd, buf, len) /* Uncancelable write. */ -#define __write_nocancel(fd, buf, len) \ - INLINE_SYSCALL (write, 3, fd, buf, len) +static inline ssize_t +__write_nocancel(int fd, const void *buf, size_t len) +{ + return INLINE_SYSCALL (write, 3, fd, buf, len); +} /* Uncancelable openat. */ #define openat_not_cancel(fd, fname, oflag, mode) \ @@ -53,8 +68,13 @@ INLINE_SYSCALL (openat, 3, fd, fname, oflag) #define openat64_not_cancel(fd, fname, oflag, mode) \ INLINE_SYSCALL (openat, 4, fd, fname, oflag | O_LARGEFILE, mode) -#define openat64_not_cancel_3(fd, fname, oflag) \ - INLINE_SYSCALL (openat, 3, fd, fname, oflag | O_LARGEFILE) +/*#define openat64_not_cancel_3(fd, fname, oflag) \ + INLINE_SYSCALL (openat, 3, fd, fname, oflag | O_LARGEFILE)*/ +static inline int +openat64_not_cancel_3 (int fd, const char *fname, int oflag) +{ + return INLINE_SYSCALL (openat, 3, fd, fname, oflag | O_LARGEFILE); +} /* Uncancelable close. */ #define __close_nocancel(fd) \ @@ -66,17 +86,23 @@ INTERNAL_SYSCALL (close, err, 1, (fd)); }) /* Uncancelable read. */ -#define read_not_cancel(fd, buf, n) \ - __read_nocancel (fd, buf, n) +static inline ssize_t +read_not_cancel (int fd, void *buf, size_t n) +{ + return __read_nocancel (fd, buf, n); +} /* Uncancelable write. */ #define write_not_cancel(fd, buf, n) \ __write_nocancel (fd, buf, n) /* Uncancelable writev. */ -#define writev_not_cancel_no_status(fd, iov, n) \ - (void) ({ INTERNAL_SYSCALL_DECL (err); \ - INTERNAL_SYSCALL (writev, err, 3, (fd), (iov), (n)); }) +static inline void +writev_not_cancel_no_status (int fd, const struct iovec *iov, int n) +{ + INTERNAL_SYSCALL_DECL (err); + INTERNAL_SYSCALL (writev, err, 3, fd, iov, n); +} /* Uncancelable fcntl. */ #define fcntl_not_cancel(fd, cmd, val) \ diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h index 1a671e1..9c13ca8 100644 --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h @@ -273,9 +273,9 @@ LOAD_REGS_0 # define ASM_ARGS_1 ASM_ARGS_0, "r" (_a1) # define LOAD_ARGS_1(a1) \ - LOAD_ARGS_TYPES_1 (long int, a1) + LOAD_ARGS_TYPES_1 (typeof (a1), a1) # define LOAD_REGS_1 \ - LOAD_REGS_TYPES_1 (long int, a1) + LOAD_REGS_TYPES_1 (typeof (__arg1), a1) # define LOAD_ARGS_TYPES_2(t1, a1, t2, a2) \ t2 __arg2 = (t2) (a2); \ @@ -285,9 +285,9 @@ LOAD_REGS_TYPES_1(t1, a1) # define ASM_ARGS_2 ASM_ARGS_1, "r" (_a2) # define LOAD_ARGS_2(a1, a2) \ - LOAD_ARGS_TYPES_2 (long int, a1, long int, a2) + LOAD_ARGS_TYPES_2 (typeof (a1), a1, typeof (a2), a2) # define LOAD_REGS_2 \ - LOAD_REGS_TYPES_2 (long int, a1, long int, a2) + LOAD_REGS_TYPES_2 (typeof (__arg1), a1, typeof (__arg2), a2) # define LOAD_ARGS_TYPES_3(t1, a1, t2, a2, t3, a3) \ t3 __arg3 = (t3) (a3); \ @@ -297,9 +297,10 @@ LOAD_REGS_TYPES_2(t1, a1, t2, a2) # define ASM_ARGS_3 ASM_ARGS_2, "r" (_a3) # define LOAD_ARGS_3(a1, a2, a3) \ - LOAD_ARGS_TYPES_3 (long int, a1, long int, a2, long int, a3) + LOAD_ARGS_TYPES_3 (typeof (a1), a1, typeof (a2), a2, typeof (a3), a3) # define LOAD_REGS_3 \ - LOAD_REGS_TYPES_3 (long int, a1, long int, a2, long int, a3) + LOAD_REGS_TYPES_3 (typeof (__arg1), a1, typeof (__arg2), a2, \ + typeof (__arg3), a3) # define LOAD_ARGS_TYPES_4(t1, a1, t2, a2, t3, a3, t4, a4) \ t4 __arg4 = (t4) (a4); \ @@ -309,11 +310,11 @@ LOAD_REGS_TYPES_3(t1, a2, t2, a2, t3, a3) # define ASM_ARGS_4 ASM_ARGS_3, "r" (_a4) # define LOAD_ARGS_4(a1, a2, a3, a4) \ - LOAD_ARGS_TYPES_4 (long int, a1, long int, a2, long int, a3, \ - long int, a4) + LOAD_ARGS_TYPES_4 (typeof (a1), a1, typeof (a2), a2, typeof (a3), a3, \ + typeof (a4), a4) # define LOAD_REGS_4 \ - LOAD_REGS_TYPES_4 (long int, a1, long int, a2, long int, a3, \ - long int, a4) + LOAD_REGS_TYPES_4 (typeof (__arg1), a1, typeof (__arg2), a2, \ + typeof (__arg3), a3, typeof (__arg4), a4) # define LOAD_ARGS_TYPES_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5) \ t5 __arg5 = (t5) (a5); \ @@ -323,11 +324,12 @@ LOAD_REGS_TYPES_4 (t1, a1, t2, a2, t3, a3, t4, a4) # define ASM_ARGS_5 ASM_ARGS_4, "r" (_a5) # define LOAD_ARGS_5(a1, a2, a3, a4, a5) \ - LOAD_ARGS_TYPES_5 (long int, a1, long int, a2, long int, a3, \ - long int, a4, long int, a5) + LOAD_ARGS_TYPES_5 (typeof (a1), a1, typeof (a2), a2, typeof (a3), a3, \ + typeof (a4), a4, typeof (a5), a5) # define LOAD_REGS_5 \ - LOAD_REGS_TYPES_5 (long int, a1, long int, a2, long int, a3, \ - long int, a4, long int, a5) + LOAD_REGS_TYPES_5 (typeof (__arg1), a1, typeof (__arg2), a2, \ + typeof (__arg3), a3, typeof (__arg4), a4, \ + typeof (__arg5), a5) # define LOAD_ARGS_TYPES_6(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5, t6, a6) \ t6 __arg6 = (t6) (a6); \ @@ -337,11 +339,12 @@ LOAD_REGS_TYPES_5 (t1, a1, t2, a2, t3, a3, t4, a4, t5, a5) # define ASM_ARGS_6 ASM_ARGS_5, "r" (_a6) # define LOAD_ARGS_6(a1, a2, a3, a4, a5, a6) \ - LOAD_ARGS_TYPES_6 (long int, a1, long int, a2, long int, a3, \ - long int, a4, long int, a5, long int, a6) + LOAD_ARGS_TYPES_6 (typeof (a1), a1, typeof (a2), a2, typeof (a3), a3, \ + typeof (a4), a4, typeof (a5), a5, typeof (a6), a6) # define LOAD_REGS_6 \ - LOAD_REGS_TYPES_6 (long int, a1, long int, a2, long int, a3, \ - long int, a4, long int, a5, long int, a6) + LOAD_REGS_TYPES_6 (typeof (__arg1), a1, typeof (__arg2), a2, \ + typeof (__arg3), a3, typeof (__arg4), a4, \ + typeof (__arg5), a5, typeof (__arg6), a6) #endif /* __ASSEMBLER__ */