Message ID | 20230417212034.3890596-3-josimmon@redhat.com |
---|---|
State | New |
Headers | show |
Series | x86_64: aarch64: Set call number just before syscall | expand |
The 04/17/2023 17:20, Joe Simmons-Talbott via Libc-alpha wrote: > To make identifying syscalls easier during call tree analysis load the > syscall number just before performing the syscall. > > Compiler optimizations can place quite a few instructions between the > setting of the syscall number and the syscall instruction. During call > tree analysis the number of instructions between the two can lead to > more difficulty for both tools and humans in properly identifying the > syscall number. Having the syscall number set in the prior instruction > to the syscall instruction makes this task easier and less error prone. > Being able to reliably identify syscalls made by a given API will make > it easier to understand and verify the safety and security of glibc. since the code has !__builtin_constant_p(name) case how would that be handled by these tools? > --- > sysdeps/unix/sysv/linux/aarch64/sysdep.h | 20 +++++++++++++++++--- > 1 file changed, 17 insertions(+), 3 deletions(-) > > diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h > index e94d1703ad..b91656fdff 100644 > --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h > +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h > @@ -167,14 +167,28 @@ > > # define HAVE_CLONE3_WRAPPER 1 > > +# define MSTR_HELPER(x) # x > +# define MSTR(x) MSTR_HELPER(x) > + i dont see this used. > # undef INTERNAL_SYSCALL_RAW > # define INTERNAL_SYSCALL_RAW(name, nr, args...) \ > ({ long _sys_result; \ > { \ > LOAD_ARGS_##nr (args) \ > - register long _x8 asm ("x8") = (name); \ > - asm volatile ("svc 0 // syscall " # name \ > - : "=r" (_x0) : "r"(_x8) ASM_ARGS_##nr : "memory"); \ > + if (__builtin_constant_p(name)) \ > + asm volatile ("mov x8, %1\n" \ > + "svc 0 // syscall " # name \ > + : "=r" (_x0) \ > + : "i" (name) ASM_ARGS_##nr \ > + : "x8", "memory"); \ > + else \ > + { \ > + register long _x8 asm ("x8") = (name); \ > + asm volatile ("svc 0 // syscall " # name \ > + : "=r" (_x0) \ > + : "r"(_x8) ASM_ARGS_##nr \ > + : "memory"); \ > + } \ > _sys_result = _x0; \ > } \ > _sys_result; }) i guess this is ok. i would probably move the generated comment to the mov x8,%1 line and remove it in the non-const case. but i rarely look at compiler output.. it seems the only cases when the name is non-const are nptl/nptl_setxid.c: result = INTERNAL_SYSCALL_NCS (xidcmd->syscall_no, 3, xidcmd->id[0], nptl/nptl_setxid.c: result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, 3, which in turn only happens for syscalls sysdeps/unix/sysv/linux/setuid.c: return INLINE_SETXID_SYSCALL (setuid32, 1, uid); sysdeps/unix/sysv/linux/setuid.c: return INLINE_SETXID_SYSCALL (setuid, 1, uid); sysdeps/unix/sysv/linux/setreuid.c: return INLINE_SETXID_SYSCALL (setreuid32, 2, ruid, euid); sysdeps/unix/sysv/linux/setreuid.c: return INLINE_SETXID_SYSCALL (setreuid, 2, ruid, euid); sysdeps/unix/sysv/linux/setegid.c: result = INLINE_SETXID_SYSCALL (setresgid32, 3, -1, gid, -1); sysdeps/unix/sysv/linux/setegid.c: result = INLINE_SETXID_SYSCALL (setresgid, 3, -1, gid, -1); sysdeps/unix/sysv/linux/setregid.c: return INLINE_SETXID_SYSCALL (setregid32, 2, rgid, egid); sysdeps/unix/sysv/linux/setregid.c: return INLINE_SETXID_SYSCALL (setregid, 2, rgid, egid); sysdeps/unix/sysv/linux/setgid.c: return INLINE_SETXID_SYSCALL (setgid32, 1, gid); sysdeps/unix/sysv/linux/setgid.c: return INLINE_SETXID_SYSCALL (setgid, 1, gid); sysdeps/unix/sysv/linux/setgroups.c: return INLINE_SETXID_SYSCALL (setgroups32, 2, n, groups); sysdeps/unix/sysv/linux/setgroups.c: return INLINE_SETXID_SYSCALL (setgroups, 2, n, groups); sysdeps/unix/sysv/linux/setresgid.c: return INLINE_SETXID_SYSCALL (setresgid32, 3, rgid, egid, sgid); sysdeps/unix/sysv/linux/setresgid.c: return INLINE_SETXID_SYSCALL (setresgid, 3, rgid, egid, sgid); sysdeps/unix/sysv/linux/setresuid.c: return INLINE_SETXID_SYSCALL (setresuid32, 3, ruid, euid, suid); sysdeps/unix/sysv/linux/setresuid.c: return INLINE_SETXID_SYSCALL (setresuid, 3, ruid, euid, suid); sysdeps/unix/sysv/linux/seteuid.c: result = INLINE_SETXID_SYSCALL (setresuid32, 3, -1, uid, -1); sysdeps/unix/sysv/linux/seteuid.c: result = INLINE_SETXID_SYSCALL (setresuid, 3, -1, uid, -1); so if we wanted we could have a switch statement in setxid and make all syscalls compile time const (other than explicit external calls to syscall())
On Tue, Apr 18, 2023 at 01:57:28PM +0100, Szabolcs Nagy wrote: > The 04/17/2023 17:20, Joe Simmons-Talbott via Libc-alpha wrote: > > To make identifying syscalls easier during call tree analysis load the > > syscall number just before performing the syscall. > > > > Compiler optimizations can place quite a few instructions between the > > setting of the syscall number and the syscall instruction. During call > > tree analysis the number of instructions between the two can lead to > > more difficulty for both tools and humans in properly identifying the > > syscall number. Having the syscall number set in the prior instruction > > to the syscall instruction makes this task easier and less error prone. > > Being able to reliably identify syscalls made by a given API will make > > it easier to understand and verify the safety and security of glibc. > > since the code has !__builtin_constant_p(name) case > how would that be handled by these tools? > > > --- > > sysdeps/unix/sysv/linux/aarch64/sysdep.h | 20 +++++++++++++++++--- > > 1 file changed, 17 insertions(+), 3 deletions(-) > > > > diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h > > index e94d1703ad..b91656fdff 100644 > > --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h > > +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h > > @@ -167,14 +167,28 @@ > > > > # define HAVE_CLONE3_WRAPPER 1 > > > > +# define MSTR_HELPER(x) # x > > +# define MSTR(x) MSTR_HELPER(x) > > + > > i dont see this used. > > > # undef INTERNAL_SYSCALL_RAW > > # define INTERNAL_SYSCALL_RAW(name, nr, args...) \ > > ({ long _sys_result; \ > > { \ > > LOAD_ARGS_##nr (args) \ > > - register long _x8 asm ("x8") = (name); \ > > - asm volatile ("svc 0 // syscall " # name \ > > - : "=r" (_x0) : "r"(_x8) ASM_ARGS_##nr : "memory"); \ > > + if (__builtin_constant_p(name)) \ > > + asm volatile ("mov x8, %1\n" \ > > + "svc 0 // syscall " # name \ > > + : "=r" (_x0) \ > > + : "i" (name) ASM_ARGS_##nr \ > > + : "x8", "memory"); \ > > + else \ > > + { \ > > + register long _x8 asm ("x8") = (name); \ > > + asm volatile ("svc 0 // syscall " # name \ > > + : "=r" (_x0) \ > > + : "r"(_x8) ASM_ARGS_##nr \ > > + : "memory"); \ > > + } \ > > _sys_result = _x0; \ > > } \ > > _sys_result; }) > > i guess this is ok. > > i would probably move the generated comment to the > mov x8,%1 line and remove it in the non-const case. > but i rarely look at compiler output.. > > it seems the only cases when the name is non-const are > > nptl/nptl_setxid.c: result = INTERNAL_SYSCALL_NCS (xidcmd->syscall_no, 3, xidcmd->id[0], > nptl/nptl_setxid.c: result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, 3, > > which in turn only happens for syscalls > > sysdeps/unix/sysv/linux/setuid.c: return INLINE_SETXID_SYSCALL (setuid32, 1, uid); > sysdeps/unix/sysv/linux/setuid.c: return INLINE_SETXID_SYSCALL (setuid, 1, uid); > sysdeps/unix/sysv/linux/setreuid.c: return INLINE_SETXID_SYSCALL (setreuid32, 2, ruid, euid); > sysdeps/unix/sysv/linux/setreuid.c: return INLINE_SETXID_SYSCALL (setreuid, 2, ruid, euid); > sysdeps/unix/sysv/linux/setegid.c: result = INLINE_SETXID_SYSCALL (setresgid32, 3, -1, gid, -1); > sysdeps/unix/sysv/linux/setegid.c: result = INLINE_SETXID_SYSCALL (setresgid, 3, -1, gid, -1); > sysdeps/unix/sysv/linux/setregid.c: return INLINE_SETXID_SYSCALL (setregid32, 2, rgid, egid); > sysdeps/unix/sysv/linux/setregid.c: return INLINE_SETXID_SYSCALL (setregid, 2, rgid, egid); > sysdeps/unix/sysv/linux/setgid.c: return INLINE_SETXID_SYSCALL (setgid32, 1, gid); > sysdeps/unix/sysv/linux/setgid.c: return INLINE_SETXID_SYSCALL (setgid, 1, gid); > sysdeps/unix/sysv/linux/setgroups.c: return INLINE_SETXID_SYSCALL (setgroups32, 2, n, groups); > sysdeps/unix/sysv/linux/setgroups.c: return INLINE_SETXID_SYSCALL (setgroups, 2, n, groups); > sysdeps/unix/sysv/linux/setresgid.c: return INLINE_SETXID_SYSCALL (setresgid32, 3, rgid, egid, sgid); > sysdeps/unix/sysv/linux/setresgid.c: return INLINE_SETXID_SYSCALL (setresgid, 3, rgid, egid, sgid); > sysdeps/unix/sysv/linux/setresuid.c: return INLINE_SETXID_SYSCALL (setresuid32, 3, ruid, euid, suid); > sysdeps/unix/sysv/linux/setresuid.c: return INLINE_SETXID_SYSCALL (setresuid, 3, ruid, euid, suid); > sysdeps/unix/sysv/linux/seteuid.c: result = INLINE_SETXID_SYSCALL (setresuid32, 3, -1, uid, -1); > sysdeps/unix/sysv/linux/seteuid.c: result = INLINE_SETXID_SYSCALL (setresuid, 3, -1, uid, -1); > > so if we wanted we could have a switch statement in setxid > and make all syscalls compile time const (other than > explicit external calls to syscall()) > If we go this route should I include #ifdef guards around each case or just the 32 bit ones? Thanks, Joe
diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h index e94d1703ad..b91656fdff 100644 --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h @@ -167,14 +167,28 @@ # define HAVE_CLONE3_WRAPPER 1 +# define MSTR_HELPER(x) # x +# define MSTR(x) MSTR_HELPER(x) + # undef INTERNAL_SYSCALL_RAW # define INTERNAL_SYSCALL_RAW(name, nr, args...) \ ({ long _sys_result; \ { \ LOAD_ARGS_##nr (args) \ - register long _x8 asm ("x8") = (name); \ - asm volatile ("svc 0 // syscall " # name \ - : "=r" (_x0) : "r"(_x8) ASM_ARGS_##nr : "memory"); \ + if (__builtin_constant_p(name)) \ + asm volatile ("mov x8, %1\n" \ + "svc 0 // syscall " # name \ + : "=r" (_x0) \ + : "i" (name) ASM_ARGS_##nr \ + : "x8", "memory"); \ + else \ + { \ + register long _x8 asm ("x8") = (name); \ + asm volatile ("svc 0 // syscall " # name \ + : "=r" (_x0) \ + : "r"(_x8) ASM_ARGS_##nr \ + : "memory"); \ + } \ _sys_result = _x0; \ } \ _sys_result; })