diff mbox series

[RFC,06/10] hurd: Make sure to not use tcb->self

Message ID 20230517191436.73636-7-bugaevc@gmail.com
State New
Headers show
Series Stack setup & misc fixes for x86_64-gnu | expand

Commit Message

Sergey Bugaev May 17, 2023, 7:14 p.m. UTC
Unlike sigstate->thread, tcb->self did not hold a Mach port reference on
the thread port it names. This means that the port can be deallocated,
and the name reused for something else, without anyone noticing. Using
tcb->self will then lead to port use-after-free.

Fortunately nothing was accessing tcb->self, other than it being
intially set to then-valid thread port name upon TCB initialization. To
assert that this keeps being the case without altering TCB layout,
rename self -> self_do_not_use, and stop initializing it.

Also, do not (re-)allocate a whole separate and unused stack for the
main thread, and just exit __pthread_setup early in this case.

Found upon attempting to use tcb->self and getting unexpected crashes.

Signed-off-by: Sergey Bugaev <bugaevc@gmail.com>
---
 sysdeps/mach/hurd/i386/tls.h         |  3 +--
 sysdeps/mach/hurd/x86/htl/pt-setup.c | 34 ++++++++++------------------
 sysdeps/mach/hurd/x86_64/tls.h       |  3 +--
 3 files changed, 14 insertions(+), 26 deletions(-)

Comments

Samuel Thibault May 17, 2023, 8:59 p.m. UTC | #1
Applied, thanks!

Sergey Bugaev, le mer. 17 mai 2023 22:14:32 +0300, a ecrit:
> Unlike sigstate->thread, tcb->self did not hold a Mach port reference on
> the thread port it names. This means that the port can be deallocated,
> and the name reused for something else, without anyone noticing. Using
> tcb->self will then lead to port use-after-free.
> 
> Fortunately nothing was accessing tcb->self, other than it being
> intially set to then-valid thread port name upon TCB initialization. To
> assert that this keeps being the case without altering TCB layout,
> rename self -> self_do_not_use, and stop initializing it.
> 
> Also, do not (re-)allocate a whole separate and unused stack for the
> main thread, and just exit __pthread_setup early in this case.
> 
> Found upon attempting to use tcb->self and getting unexpected crashes.
> 
> Signed-off-by: Sergey Bugaev <bugaevc@gmail.com>
> ---
>  sysdeps/mach/hurd/i386/tls.h         |  3 +--
>  sysdeps/mach/hurd/x86/htl/pt-setup.c | 34 ++++++++++------------------
>  sysdeps/mach/hurd/x86_64/tls.h       |  3 +--
>  3 files changed, 14 insertions(+), 26 deletions(-)
> 
> diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
> index e124fb10..ba283008 100644
> --- a/sysdeps/mach/hurd/i386/tls.h
> +++ b/sysdeps/mach/hurd/i386/tls.h
> @@ -32,7 +32,7 @@ typedef struct
>  {
>    void *tcb;			/* Points to this structure.  */
>    dtv_t *dtv;			/* Vector of pointers to TLS data.  */
> -  thread_t self;		/* This thread's control port.  */
> +  thread_t self_do_not_use;	/* This thread's control port.  */
>    int multiple_threads;
>    uintptr_t sysinfo;
>    uintptr_t stack_guard;
> @@ -419,7 +419,6 @@ _hurd_tls_new (thread_t child, tcbhead_t *tcb)
>    HURD_TLS_DESC_DECL (desc, tcb);
>  
>    tcb->tcb = tcb;
> -  tcb->self = child;
>  
>    if (HURD_SEL_LDT (sel))
>      err = __i386_set_ldt (child, sel, &desc, 1);
> diff --git a/sysdeps/mach/hurd/x86/htl/pt-setup.c b/sysdeps/mach/hurd/x86/htl/pt-setup.c
> index 3abd92b2..686124d7 100644
> --- a/sysdeps/mach/hurd/x86/htl/pt-setup.c
> +++ b/sysdeps/mach/hurd/x86/htl/pt-setup.c
> @@ -19,6 +19,7 @@
>  #include <stdint.h>
>  #include <assert.h>
>  #include <mach.h>
> +#include <hurd.h>
>  
>  #include <pt-internal.h>
>  
> @@ -76,35 +77,24 @@ __pthread_setup (struct __pthread *thread,
>  				      void *), void *(*start_routine) (void *),
>  		 void *arg)
>  {
> -  tcbhead_t *tcb;
>    error_t err;
> -  mach_port_t ktid;
>  
> -  thread->mcontext.pc = entry_point;
> -  thread->mcontext.sp = stack_setup (thread, start_routine, arg);
> -
> -  ktid = __mach_thread_self ();
> -  if (thread->kernel_thread == ktid)
> +  if (thread->kernel_thread == hurd_thread_self ())
>      /* Fix up the TCB for the main thread.  The C library has already
>         installed a TCB, which we want to keep using.  This TCB must not
>         be freed so don't register it in the thread structure.  On the
>         other hand, it's not yet possible to reliably release a TCB.
> -       Leave the unused one registered so that it doesn't leak.  The
> -       only thing left to do is to correctly set the `self' member in
> -       the already existing TCB.  */
> -    tcb = THREAD_SELF;
> -  else
> -    {
> -      err = __thread_set_pcsptp (thread->kernel_thread,
> -				 1, thread->mcontext.pc,
> -				 1, thread->mcontext.sp,
> -				 1, thread->tcb);
> -      assert_perror (err);
> -      tcb = thread->tcb;
> -    }
> -  __mach_port_deallocate (__mach_task_self (), ktid);
> +       Leave the unused one registered so that it doesn't leak.  */
> +    return 0;
> +
> +  thread->mcontext.pc = entry_point;
> +  thread->mcontext.sp = stack_setup (thread, start_routine, arg);
>  
> -  tcb->self = thread->kernel_thread;
> +  err = __thread_set_pcsptp (thread->kernel_thread,
> +			     1, thread->mcontext.pc,
> +			     1, thread->mcontext.sp,
> +			     1, thread->tcb);
> +  assert_perror (err);
>  
>    return 0;
>  }
> diff --git a/sysdeps/mach/hurd/x86_64/tls.h b/sysdeps/mach/hurd/x86_64/tls.h
> index 1274723a..35dcef44 100644
> --- a/sysdeps/mach/hurd/x86_64/tls.h
> +++ b/sysdeps/mach/hurd/x86_64/tls.h
> @@ -35,7 +35,7 @@ typedef struct
>  {
>    void *tcb;			/* Points to this structure.  */
>    dtv_t *dtv;			/* Vector of pointers to TLS data.  */
> -  thread_t self;		/* This thread's control port.  */
> +  thread_t self_do_no_use;	/* This thread's control port.  */
>    int __glibc_padding1;
>    int multiple_threads;
>    int gscope_flag;
> @@ -158,7 +158,6 @@ _hurd_tls_new (thread_t child, tcbhead_t *tcb)
>    struct i386_fsgs_base_state state;
>  
>    tcb->tcb = tcb;
> -  tcb->self = child;
>  
>    /* Install the TCB address into FS base.  */
>    state.fs_base = (uintptr_t) tcb;
> -- 
> 2.40.1
> 
>
Joseph Myers May 18, 2023, 6:55 p.m. UTC | #2
I suspect this of causing linknamespace test failures:

Contents of conform/POSIX2008/pthread.h/linknamespace.out:

[initial] pthread_create -> [libpthread.a(pt-create.o)] __pthread_setup -> [libpthread.a(pt-setup.o)] hurd_thread_self

(On x86_64 there's also a localplt test failure: "Extra PLT reference: 
libc.so: hurd_thread_self".  In addition, x86_64 has a c++-types-check 
failure.  Thus, neither Hurd configuration has clean results for 
compilation tests from build-many-glibcs.py at present.)
Sergey Bugaev May 18, 2023, 7:33 p.m. UTC | #3
Hello,

On Thu, May 18, 2023 at 9:55 PM Joseph Myers <joseph@codesourcery.com> wrote:
>
> I suspect this of causing linknamespace test failures:
>
> Contents of conform/POSIX2008/pthread.h/linknamespace.out:
>
> [initial] pthread_create -> [libpthread.a(pt-create.o)] __pthread_setup -> [libpthread.a(pt-setup.o)] hurd_thread_self
>
> (On x86_64 there's also a localplt test failure: "Extra PLT reference:
> libc.so: hurd_thread_self".  In addition, x86_64 has a c++-types-check
> failure.  Thus, neither Hurd configuration has clean results for
> compilation tests from build-many-glibcs.py at present.)

Thank you, and sorry :|

Do I understand it right that this is because of hurd_thread_self
being publicly exported and interposable? A quick grep shows that
nothing else inside glibc was using hurd_thread_self indeed.

Would the right way to resolve this be introducing a hidden
__hurd_thread_self and using that? We could also make it
__mach_thread_self () + __mach_port_deallocate (), but why do +2
syscalls when we already have all the required info in userspace.

What's the C++ type check failure? Surely this patch (nor
2f8ecb58a59eb82c43214d000842d99644a662d1 "hurd: Fix x86_64
_hurd_tls_fork") has modified any public headers?

Sergey
Joseph Myers May 18, 2023, 8:16 p.m. UTC | #4
On Thu, 18 May 2023, Sergey Bugaev via Libc-alpha wrote:

> Hello,
> 
> On Thu, May 18, 2023 at 9:55 PM Joseph Myers <joseph@codesourcery.com> wrote:
> >
> > I suspect this of causing linknamespace test failures:
> >
> > Contents of conform/POSIX2008/pthread.h/linknamespace.out:
> >
> > [initial] pthread_create -> [libpthread.a(pt-create.o)] __pthread_setup -> [libpthread.a(pt-setup.o)] hurd_thread_self
> >
> > (On x86_64 there's also a localplt test failure: "Extra PLT reference:
> > libc.so: hurd_thread_self".  In addition, x86_64 has a c++-types-check
> > failure.  Thus, neither Hurd configuration has clean results for
> > compilation tests from build-many-glibcs.py at present.)
> 
> Thank you, and sorry :|
> 
> Do I understand it right that this is because of hurd_thread_self
> being publicly exported and interposable? A quick grep shows that
> nothing else inside glibc was using hurd_thread_self indeed.
> 
> Would the right way to resolve this be introducing a hidden
> __hurd_thread_self and using that? We could also make it

Yes.

Strictly there are two separate issues: (a) calling an exported symbol 
should be done via a hidden alias, to avoid going via the PLT, and (b) 
functions in a standard namespace should not call names in the user's 
namespace, which requires using a name such as __hurd_thread_self instead 
(which should also be marked hidden to make the call optimally efficient).

> What's the C++ type check failure? Surely this patch (nor
> 2f8ecb58a59eb82c43214d000842d99644a662d1 "hurd: Fix x86_64
> _hurd_tls_fork") has modified any public headers?

The C++ type check failure was already present before this patch.

--- sysdeps/mach/hurd/x86_64/c++-types.data     2023-05-02 09:14:30.246903708 +0000
+++ -   2023-05-18 02:08:06.184068438 +0000
@@ -1 +1 @@
-blkcnt64_t:x
+blkcnt64_t:l
@@ -8 +8 @@
-dev_t:j
+dev_t:m
@@ -10 +10 @@
-fsblkcnt64_t:y
+fsblkcnt64_t:m
@@ -12 +12 @@
-fsfilcnt64_t:y
+fsfilcnt64_t:m
@@ -14 +14 @@
-fsid_t:y
+fsid_t:m
@@ -17 +17 @@
-ino64_t:y
+ino64_t:m
@@ -21 +21 @@
-int64_t:x
+int64_t:l
@@ -23 +23 @@
-intptr_t:i
+intptr_t:l
@@ -25 +25 @@
-loff_t:x
+loff_t:l
@@ -27,2 +27,2 @@
-nlink_t:j
-off64_t:x
+nlink_t:m
+off64_t:l
@@ -43,4 +43,4 @@
-pthread_t:i
-quad_t:x
-register_t:i
-rlim64_t:y
+pthread_t:l
+quad_t:l
+register_t:l
+rlim64_t:m
@@ -49 +49 @@
-size_t:j
+size_t:m
@@ -51 +51 @@
-ssize_t:i
+ssize_t:l
@@ -60 +60 @@
-u_int64_t:y
+u_int64_t:m
@@ -64 +64 @@
-u_quad_t:y
+u_quad_t:m
Samuel Thibault May 18, 2023, 11:47 p.m. UTC | #5
Joseph Myers, le jeu. 18 mai 2023 20:16:03 +0000, a ecrit:
> The C++ type check failure was already present before this patch.
> 
> --- sysdeps/mach/hurd/x86_64/c++-types.data     2023-05-02 09:14:30.246903708 +0000
> +++ -   2023-05-18 02:08:06.184068438 +0000
> @@ -1 +1 @@
> -blkcnt64_t:x
> +blkcnt64_t:l

etc.

Uh, I'm confused. I do remember seeing a c++-types test failing and thus
adding the c++ data file, but somehow apparently only added the 32bit
one. And I'm not actually seeing the c++-types failure in my test box,
thus it went unnoticed... I commited the update to 64, thanks!

Samuel
Sergey Bugaev May 19, 2023, 8:22 a.m. UTC | #6
On Thu, May 18, 2023 at 11:16 PM Joseph Myers <joseph@codesourcery.com> wrote:
> Strictly there are two separate issues: (a) calling an exported symbol
> should be done via a hidden alias, to avoid going via the PLT, and (b)
> functions in a standard namespace should not call names in the user's
> namespace, which requires using a name such as __hurd_thread_self instead
> (which should also be marked hidden to make the call optimally efficient).

While we're talking about this, could you please say if my
understanding below is correct (and correct me if not)?

'foo' is a public symbol, to be used by the user, and also
interposable by the user. Always called via PLT (except from inside
the user's code when redefined inside the executable, in which case
the compiler/linker will see that no PLT is needed), and should not be
called from inside glibc.

'__foo' is a (public? private? semi-private?) symbol, the user code is
not supposed to use it, but it's exported from libc.so for the benefit
of other glibc code that ends up in different DSOs and still wants to
call the original version even when 'foo' gets interposed. Invoked via
PLT from other DSOs, not invoked from libc.so because of...

'__GI___foo' is a private non-exported symbol created by the
hidden_def/hidden_proto macro being used on '__foo', this is what the
code in libc.so (or: whatever DSO the symbol is hidden_def'ed in)
calls to avoid PLT jumps.

Q: why '__foo', why not use hidden_def/hidden_proto on the 'foo' directly?
A: that would only work for code that ends up in libc.so (or rather,
the same DSOs as 'foo'), but we still want other code to also call the
non-interposed, original version of 'foo'

Is that ^^ correct? Should each and every function that is exported to
the user and also used inside libc's own code have '__foo' and
'__GI___foo' versions? What does 'GI' even stand for?

Thanks in advance,
Sergey
Florian Weimer May 19, 2023, 9:39 a.m. UTC | #7
* Sergey Bugaev:

> On Thu, May 18, 2023 at 11:16 PM Joseph Myers <joseph@codesourcery.com> wrote:
>> Strictly there are two separate issues: (a) calling an exported symbol
>> should be done via a hidden alias, to avoid going via the PLT, and (b)
>> functions in a standard namespace should not call names in the user's
>> namespace, which requires using a name such as __hurd_thread_self instead
>> (which should also be marked hidden to make the call optimally efficient).
>
> While we're talking about this, could you please say if my
> understanding below is correct (and correct me if not)?
>
> 'foo' is a public symbol, to be used by the user, and also
> interposable by the user. Always called via PLT (except from inside
> the user's code when redefined inside the executable, in which case
> the compiler/linker will see that no PLT is needed), and should not be
> called from inside glibc.
>
> '__foo' is a (public? private? semi-private?) symbol, the user code is
> not supposed to use it, but it's exported from libc.so for the benefit
> of other glibc code that ends up in different DSOs and still wants to
> call the original version even when 'foo' gets interposed. Invoked via
> PLT from other DSOs, not invoked from libc.so because of...

__foo may be exported or not.  We have some __ symbols that are used by
the implementation (so GCC etc.), and probably some that are expected to
be used by users.  Truely exported symbols have a GLIBC_2.y symbol
version, internal exported symbols (usually added for coordination
between different shared objects that make up glibc) have a
GLIBC_PRIVATE symbol version.

Some old internal symbols have GLIBC_2.0 or similar early versions
because GLIBC_PRIVATE did not exist then.

> '__GI___foo' is a private non-exported symbol created by the
> hidden_def/hidden_proto macro being used on '__foo', this is what the
> code in libc.so (or: whatever DSO the symbol is hidden_def'ed in)
> calls to avoid PLT jumps.

Correct.

> Q: why '__foo', why not use hidden_def/hidden_proto on the 'foo' directly?
> A: that would only work for code that ends up in libc.so (or rather,
> the same DSOs as 'foo'), but we still want other code to also call the
> non-interposed, original version of 'foo'

hidden_def/hidden_proto does not do anything for static linking, given
the macros are defined today.

It's of course possible to do all this quite differently.

Thanks,
Florian
Joseph Myers May 19, 2023, 4:50 p.m. UTC | #8
On Fri, 19 May 2023, Sergey Bugaev via Libc-alpha wrote:

> 'foo' is a public symbol, to be used by the user, and also
> interposable by the user. Always called via PLT (except from inside
> the user's code when redefined inside the executable, in which case
> the compiler/linker will see that no PLT is needed), and should not be
> called from inside glibc.

Should not be called from within glibc via the PLT within the same 
library.

It's OK for foo to be called from bar if foo is part of all the standards 
that contain bar.  But in that case, if the call is from the same library, 
*_hidden_def / *_hidden_proto should be used to avoid calling via the PLT.

If foo is not part of all the standards that contain bar, then glibc code 
for bar should use __foo instead to avoid namespace issues, especially for 
static linking.

If __foo is needed by executables, *_nonshared.a or other shared 
libraries, then it also needs to be exported from the library it's in (in 
which case PLT avoidance is also relevant for __foo, so *_hidden_* should 
be used for __foo).

If __foo is only used within a single library and doesn't need to be 
exported, it's OK to declare it directly with attribute_hidden rather than 
using *_hidden_* to create __GI___foo as an alias.  (In this case, if you 
forget to use attribute_hidden, you won't get test failures - the symbol 
in fact won't get exported, because only symbols explicitly listed in the 
version maps get exported, and so it won't get a PLT entry.  But in some 
cases, code generation is more efficient if the compiler knows that the 
symbol is hidden.  So when you're calling an unexported symbol in the same 
library, it's still desirable for it to be declared as hidden in a way 
visible to the compiler.)  The more complicated hidden_proto / hidden_def 
approach also works for unexported symbols used within a single library, 
it's just more complicated than necessary in that case.
diff mbox series

Patch

diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
index e124fb10..ba283008 100644
--- a/sysdeps/mach/hurd/i386/tls.h
+++ b/sysdeps/mach/hurd/i386/tls.h
@@ -32,7 +32,7 @@  typedef struct
 {
   void *tcb;			/* Points to this structure.  */
   dtv_t *dtv;			/* Vector of pointers to TLS data.  */
-  thread_t self;		/* This thread's control port.  */
+  thread_t self_do_not_use;	/* This thread's control port.  */
   int multiple_threads;
   uintptr_t sysinfo;
   uintptr_t stack_guard;
@@ -419,7 +419,6 @@  _hurd_tls_new (thread_t child, tcbhead_t *tcb)
   HURD_TLS_DESC_DECL (desc, tcb);
 
   tcb->tcb = tcb;
-  tcb->self = child;
 
   if (HURD_SEL_LDT (sel))
     err = __i386_set_ldt (child, sel, &desc, 1);
diff --git a/sysdeps/mach/hurd/x86/htl/pt-setup.c b/sysdeps/mach/hurd/x86/htl/pt-setup.c
index 3abd92b2..686124d7 100644
--- a/sysdeps/mach/hurd/x86/htl/pt-setup.c
+++ b/sysdeps/mach/hurd/x86/htl/pt-setup.c
@@ -19,6 +19,7 @@ 
 #include <stdint.h>
 #include <assert.h>
 #include <mach.h>
+#include <hurd.h>
 
 #include <pt-internal.h>
 
@@ -76,35 +77,24 @@  __pthread_setup (struct __pthread *thread,
 				      void *), void *(*start_routine) (void *),
 		 void *arg)
 {
-  tcbhead_t *tcb;
   error_t err;
-  mach_port_t ktid;
 
-  thread->mcontext.pc = entry_point;
-  thread->mcontext.sp = stack_setup (thread, start_routine, arg);
-
-  ktid = __mach_thread_self ();
-  if (thread->kernel_thread == ktid)
+  if (thread->kernel_thread == hurd_thread_self ())
     /* Fix up the TCB for the main thread.  The C library has already
        installed a TCB, which we want to keep using.  This TCB must not
        be freed so don't register it in the thread structure.  On the
        other hand, it's not yet possible to reliably release a TCB.
-       Leave the unused one registered so that it doesn't leak.  The
-       only thing left to do is to correctly set the `self' member in
-       the already existing TCB.  */
-    tcb = THREAD_SELF;
-  else
-    {
-      err = __thread_set_pcsptp (thread->kernel_thread,
-				 1, thread->mcontext.pc,
-				 1, thread->mcontext.sp,
-				 1, thread->tcb);
-      assert_perror (err);
-      tcb = thread->tcb;
-    }
-  __mach_port_deallocate (__mach_task_self (), ktid);
+       Leave the unused one registered so that it doesn't leak.  */
+    return 0;
+
+  thread->mcontext.pc = entry_point;
+  thread->mcontext.sp = stack_setup (thread, start_routine, arg);
 
-  tcb->self = thread->kernel_thread;
+  err = __thread_set_pcsptp (thread->kernel_thread,
+			     1, thread->mcontext.pc,
+			     1, thread->mcontext.sp,
+			     1, thread->tcb);
+  assert_perror (err);
 
   return 0;
 }
diff --git a/sysdeps/mach/hurd/x86_64/tls.h b/sysdeps/mach/hurd/x86_64/tls.h
index 1274723a..35dcef44 100644
--- a/sysdeps/mach/hurd/x86_64/tls.h
+++ b/sysdeps/mach/hurd/x86_64/tls.h
@@ -35,7 +35,7 @@  typedef struct
 {
   void *tcb;			/* Points to this structure.  */
   dtv_t *dtv;			/* Vector of pointers to TLS data.  */
-  thread_t self;		/* This thread's control port.  */
+  thread_t self_do_no_use;	/* This thread's control port.  */
   int __glibc_padding1;
   int multiple_threads;
   int gscope_flag;
@@ -158,7 +158,6 @@  _hurd_tls_new (thread_t child, tcbhead_t *tcb)
   struct i386_fsgs_base_state state;
 
   tcb->tcb = tcb;
-  tcb->self = child;
 
   /* Install the TCB address into FS base.  */
   state.fs_base = (uintptr_t) tcb;