Make abort() AS-safe (Bug 26275).

Message ID	20200927141952.121047-1-carlos@redhat.com
State	New
Headers	show Return-Path: <libc-alpha-bounces@sourceware.org> DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 98552386EC3E To: libc-alpha@sourceware.org Subject: [PATCH] Make abort() AS-safe (Bug 26275). Date: Sun, 27 Sep 2020 10:19:52 -0400 Message-Id: <20200927141952.121047-1-carlos@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="US-ASCII" Precedence: list From: Carlos O'Donell via Libc-alpha <libc-alpha@sourceware.org> Reply-To: Carlos O'Donell <carlos@redhat.com> Errors-To: libc-alpha-bounces@sourceware.org Sender: "Libc-alpha" <libc-alpha-bounces@sourceware.org>
Series	Make abort() AS-safe (Bug 26275). \| expand Make abort() AS-safe (Bug 26275).

diff --git a/include/stdlib.h b/include/stdlib.h index ffcefd7b85..a2431f9139 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -315,6 +315,10 @@ extern __typeof (unsetenv) unsetenv attribute_hidden; extern __typeof (__strtoul_internal) __strtoul_internal attribute_hidden; # endif -#endif +/* Coordinate locks with fork to make abort AS-safe. */ +void __abort_fork_lock (void) attribute_hidden; +void __abort_fork_unlock (void) attribute_hidden; + +#endif /* !defined _ISOMAC */ #endif /* include/stdlib.h */ diff --git a/manual/startup.texi b/manual/startup.texi index 21c48cd037..e38cdc0e28 100644 --- a/manual/startup.texi +++ b/manual/startup.texi @@ -992,10 +992,7 @@ for this function is in @file{stdlib.h}. @deftypefun void abort (void) @standards{ISO, stdlib.h} -@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} -@c The implementation takes a recursive lock and attempts to support -@c calls from signal handlers, but if we're in the middle of flushing or -@c using streams, we may encounter them in inconsistent states. +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{} @acucorrupt{}}} The @code{abort} function causes abnormal program termination. This does not execute cleanup functions registered with @code{atexit} or @code{on_exit}. diff --git a/stdlib/Makefile b/stdlib/Makefile index 4615f6dfe7..cd470e53ac 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -87,7 +87,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ tst-makecontext-align test-bz22786 tst-strtod-nan-sign \ tst-swapcontext1 tst-setcontext4 tst-setcontext5 \ tst-setcontext6 tst-setcontext7 tst-setcontext8 \ - tst-setcontext9 tst-bz20544 + tst-setcontext9 tst-bz20544 tst-threaded-fork-abort \ tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ tst-tls-atexit tst-tls-atexit-nodelete @@ -243,3 +243,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3 $(evaluate-test) $(objpfx)tst-makecontext: $(libdl) + +LDLIBS-tst-threaded-fork-abort = $(shared-thread-library) diff --git a/stdlib/abort.c b/stdlib/abort.c index df98782dd7..e7f74319d6 100644 --- a/stdlib/abort.c +++ b/stdlib/abort.c @@ -42,6 +42,17 @@ static int stage; /* We should be prepared for multiple threads trying to run abort. */ __libc_lock_define_initialized_recursive (static, lock); +void +__abort_fork_lock (void) +{ + __libc_lock_lock_recursive (lock); +} + +void +__abort_fork_unlock (void) +{ + __libc_lock_unlock_recursive (lock); +} /* Cause an abnormal program termination with core-dump. */ void diff --git a/stdlib/tst-threaded-fork-abort.c b/stdlib/tst-threaded-fork-abort.c new file mode 100644 index 0000000000..67f0b09086 --- /dev/null +++ b/stdlib/tst-threaded-fork-abort.c @@ -0,0 +1,168 @@ +/* Verify abort is AS-safe (Bug 26275). + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <support/xthread.h> +#include <support/xsignal.h> +#include <support/xunistd.h> +#include <support/support.h> +#include <support/test-driver.h> +#include <support/timespec.h> +#include <support/capture_subprocess.h> + +static void +handle_abort (int sig) +{ + int wstatus; + pid_t ret; + /* The parent has called abort, and now we wait on the child to call + abort. If the child never calls abort because the abort is live + locked on the abort lock then the test times out with an error. + If the child calls abort we see that, capture it here, and exit + the test successfully. We loop because we might have called abort + before the child was created and so we have to try again. */ + do + { + ret = wait (&wstatus); + } + while (ret == -1); + + /* The child must have exited with SIGABRT or we fail the test. */ + if (WIFSIGNALED (wstatus)) + if (WTERMSIG (wstatus) == SIGABRT) + _exit (0); + + /* The child exit condition is not what we expected. */ + _exit (1); +} + +static void * +thread_abort (void *closure) +{ + pthread_barrier_t *barrier = (pthread_barrier_t *) closure; + xpthread_barrier_wait (barrier); + /* Attempt to abort. */ + abort (); + return NULL; +} + +static void * +thread_fork (void *closure) +{ + pthread_barrier_t *barrier = (pthread_barrier_t *) closure; + pid_t child; + + /* Bug 26275: In the child the abort hangs. This allows the parent + to wait for the child termination in its own SIGABRT handler. + We want to fork just after the aborting thread has taken the + abort handler lock, but we can't always time it right. */ + xpthread_barrier_wait (barrier); + child = xfork (); + + if (child == 0) + { + /* Reset default disposition for SIGABRT so our own abort will + terminate the child. */ + signal (SIGABRT, SIG_DFL); + /* Without the fix this hangs because the forked child observes + the abort lock as held. */ + abort (); + } + + return NULL; +} + +/* The goal of this test is to call fork from thread B while thread A is + in the process of aborting. Bug 26275 shows that if thread A is + holding the abort lock while thread B forks then thread B will be + unable to abort (livelock on the abort lock). The fix is for + thread B to take the abort lock during fork setup, and then release + it after the fork (in both child and parent). This test is + probabilistic and may not trigger the bug, but we run for long enough + that any issues during this abort/fork sequencing should show up in + long-term testing as this test failing every once in a while. + + On a x86_64 VM (4 vCPU @ 3.5GHz) the test fails once every 100-600 + seconds when run continuosly in a loop. We want this test to fail + with a probability of say 10% so develpers see it at least once in + a one or two week development period, or at least once during the + release window. To achieve that we run for at least 20 seconds, + and set the timeout at 30 seconds (gives time for a last iteration + to complete). */ +static void +test_fork_abort (void *closure __attribute__ ((unused))) +{ + struct sigaction sact; + struct sigaction oact; + pthread_barrier_t barrier; + + /* Wait for both threads to get in the right spot. */ + xpthread_barrier_init (&barrier, NULL, 2); + + /* Handle the abort. */ + sact.sa_handler = handle_abort; + xsigaction (SIGABRT, &sact, &oact); + + /* Start the abort thread. */ + xpthread_create (NULL, thread_abort, &barrier); + + /* Start the fork thread. */ + xpthread_create (NULL, thread_fork, &barrier); + + /* Wait indefinately. We exit via the abort handler. */ + while (1) + pause (); +} + +static int +do_test (void) +{ + struct support_capture_subprocess result; + struct timespec before, after, running, end; + + /* Total runtime is 20 seconds. */ + end = make_timespec (20, 0); + + /* Measure start time. */ + xclock_gettime (CLOCK_MONOTONIC, &before); + + /* Run test for 20 seconds. */ + do + { + result = support_capture_subprocess (test_fork_abort, NULL); + support_capture_subprocess_check (&result, "test_fork_abort", 0, + sc_allow_none); + xclock_gettime (CLOCK_MONOTONIC, &after); + running = timespec_sub (support_timespec_normalize (after), + support_timespec_normalize (before)); + } + while (TIMESPEC_BEFORE (running, end)); + + /* Success. */ + return 0; +} + +#define TIMEOUT 30 +#define TEST_FUNCTION do_test +#include <support/test-driver.c> diff --git a/support/timespec.c b/support/timespec.c index edbdb165ec..3d2cdfca5b 100644 --- a/support/timespec.c +++ b/support/timespec.c @@ -27,18 +27,17 @@ test_timespec_before_impl (const char *file, int line, const struct timespec left, const struct timespec right) { - if (left.tv_sec > right.tv_sec - || (left.tv_sec == right.tv_sec - && left.tv_nsec > right.tv_nsec)) { - support_record_failure (); - const struct timespec diff = timespec_sub (left, right); - printf ("%s:%d: %jd.%09jds not before %jd.%09jds " - "(difference %jd.%09jds)\n", - file, line, - (intmax_t) left.tv_sec, (intmax_t) left.tv_nsec, - (intmax_t) right.tv_sec, (intmax_t) right.tv_nsec, - (intmax_t) diff.tv_sec, (intmax_t) diff.tv_nsec); - } + if (!TIMESPEC_BEFORE(left, right)) + { + support_record_failure (); + const struct timespec diff = timespec_sub (left, right); + printf ("%s:%d: %jd.%09jds not before %jd.%09jds " + "(difference %jd.%09jds)\n", + file, line, + (intmax_t) left.tv_sec, (intmax_t) left.tv_nsec, + (intmax_t) right.tv_sec, (intmax_t) right.tv_nsec, + (intmax_t) diff.tv_sec, (intmax_t) diff.tv_nsec); + } } void diff --git a/support/timespec.h b/support/timespec.h index 1a6775a882..280844f2c0 100644 --- a/support/timespec.h +++ b/support/timespec.h @@ -55,6 +55,11 @@ struct timespec support_timespec_normalize (struct timespec time); int support_timespec_check_in_range (struct timespec expected, struct timespec observed, double lower_bound, double upper_bound); +/* True if left is before right, false otherwise. */ +#define TIMESPEC_BEFORE(left, right) \ + ((left.tv_sec < right.tv_sec) \ + || (left.tv_sec == right.tv_sec \ + && left.tv_nsec < right.tv_nsec)) /* Check that the timespec on the left represents a time before the time on the right. */ diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c index 5091a000e3..8fdc6589e5 100644 --- a/sysdeps/nptl/fork.c +++ b/sysdeps/nptl/fork.c @@ -66,6 +66,11 @@ __libc_fork (void) { _IO_list_lock (); + /* Acquire the abort lock. We need to prevent other threads + from aborting while we fork. We must keep abort AS-safe + and to do that we need to own the lock in the child. */ + __abort_fork_lock (); + /* Acquire malloc locks. This needs to come last because fork handlers may use malloc, and the libio list lock has an indirect malloc dependency as well (via the getdelim @@ -113,6 +118,9 @@ __libc_fork (void) /* Release malloc locks. */ call_function_static_weak (__malloc_fork_unlock_child); + /* Release the abort lock. */ + __abort_fork_unlock (); + /* Reset the file list. These are recursive mutexes. */ fresetlockfiles (); @@ -134,6 +142,9 @@ __libc_fork (void) /* Release malloc locks, parent process variant. */ call_function_static_weak (__malloc_fork_unlock_parent); + /* Release the abort lock. */ + __abort_fork_unlock (); + /* We execute this even if the 'fork' call failed. */ _IO_list_unlock (); }

Make abort() AS-safe (Bug 26275).

Commit Message

Comments

Patch