Message ID | dc4a606da98f281312bad7a1018a50a71a0d7def.1700246487.git.fweimer@redhat.com |
---|---|
State | New |
Headers | show |
Series | Various qsort fixes | expand |
On 17/11/23 15:44, Florian Weimer wrote: > The existing logic avoided internal stack overflow. To avoid > a denial-of-service condition with adversarial input, it is necessary > to fall over to heapsort if tail-recursing deeply, too, which does > not result in a deep stack of pending partitions. > > The new test stdlib/tst-qsort5 is based on Douglas McIlroy's paper > on this subject. LGTM, thanks. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > stdlib/Makefile | 3 + > stdlib/qsort.c | 17 +++-- > stdlib/tst-qsort5.c | 171 ++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 187 insertions(+), 4 deletions(-) > create mode 100644 stdlib/tst-qsort5.c > > diff --git a/stdlib/Makefile b/stdlib/Makefile > index 48688f6a27..6194d1cb22 100644 > --- a/stdlib/Makefile > +++ b/stdlib/Makefile > @@ -215,6 +215,7 @@ tests := \ > tst-qsort \ > tst-qsort2 \ > tst-qsort3 \ > + tst-qsort5 \ > tst-quick_exit \ > tst-rand48 \ > tst-rand48-2 \ > @@ -512,3 +513,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3 > '$(run-program-env)' '$(test-program-prefix-after-env)' \ > $(common-objpfx)stdlib/; \ > $(evaluate-test) > + > +$(objpfx)tst-qsort5: $(libm) > diff --git a/stdlib/qsort.c b/stdlib/qsort.c > index a2f9e916ef..be01fb5598 100644 > --- a/stdlib/qsort.c > +++ b/stdlib/qsort.c > @@ -389,14 +389,23 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size, > { > if ((size_t) (hi - left_ptr) <= max_thresh) > /* Ignore both small partitions. */ > - top = pop (top, &lo, &hi, &depth); > + { > + top = pop (top, &lo, &hi, &depth); > + --depth; > + } > else > - /* Ignore small left partition. */ > - lo = left_ptr; > + { > + /* Ignore small left partition. */ > + lo = left_ptr; > + --depth; > + } > } > else if ((size_t) (hi - left_ptr) <= max_thresh) > /* Ignore small right partition. */ > - hi = right_ptr; > + { > + hi = right_ptr; > + --depth; > + } > else if ((right_ptr - lo) > (hi - left_ptr)) > { > /* Push larger left partition indices. */ Ok (kinda embarrassing I missed this obvious depth update...). > diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c > new file mode 100644 > index 0000000000..d3a88c30f8 > --- /dev/null > +++ b/stdlib/tst-qsort5.c > @@ -0,0 +1,171 @@ > +/* Adversarial test for qsort_r. > + Copyright (C) 2023 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +/* The approach follows Douglas McIlroy, A Killer Adversary for > + Quicksort. Software—Practice and Experience 29 (1999) 341-344. > + Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf> > + (2023-11-17). */ > + > +#include <math.h> > +#include <stdlib.h> > +#include <stdio.h> > +#include <support/check.h> > +#include <support/support.h> > + > +struct context > +{ > + /* Called the gas value in the paper. This value is larger than all > + other values (length minus one will do), so comparison with any > + decided value has a known result. */ > + int undecided_value; > + > + /* If comparing undecided values, one of them as to be assigned a > + value to ensure consistency with future comparisons. This is the > + value that will be used. Starts out at zero. */ > + int next_decided; > + > + /* Used to trick pivot selection. Deciding the value for the last > + seen undcided value in a decided/undecided comparison happens > + to trick the many qsort implementations. */ > + int last_undecided_index; > + > + /* This array contains the actually asigned values. The call to > + qsort_r sorts a different array that contains indices into this > + array. */ > + int *decided_values; > +}; > + > +static int > +compare_opponent (const void *l1, const void *r1, void *ctx1) > +{ > + const int *l = l1; > + const int *r = r1; > + struct context *ctx = ctx1; > + int rvalue = ctx->decided_values[*r]; > + int lvalue = ctx->decided_values[*l]; > + > + if (lvalue == ctx->undecided_value) > + { > + if (rvalue == ctx->undecided_value) > + { > + /* Both values are undecided. In this case, make a decision > + for the last-used undecided value. This is tweak is very > + specific to quicksort. */ > + if (*l == ctx->last_undecided_index) > + { > + ctx->decided_values[*l] = ctx->next_decided; > + ++ctx->next_decided; > + /* The undecided value or *r is greater. */ > + return -1; > + } > + else > + { > + ctx->decided_values[*r] = ctx->next_decided; > + ++ctx->next_decided; > + /* The undecided value for *l is greater. */ > + return 1; > + } > + } > + else > + { > + ctx->last_undecided_index = *l; > + return 1; > + } > + } > + else > + { > + /* *l is a decided value. */ > + if (rvalue == ctx->undecided_value) > + { > + ctx->last_undecided_index = *r; > + /* The undecided value for *r is greater. */ > + return -1; > + } > + else > + return lvalue - rvalue; > + } > +} > + > +/* Return a pointer to the adversarial permutation of length N. */ > +static int * > +create_permutation (size_t n) > +{ > + struct context ctx = > + { > + .undecided_value = n - 1, /* Larger than all other values. */ > + .decided_values = xcalloc (n, sizeof (int)), > + }; > + for (size_t i = 0; i < n; ++i) > + ctx.decided_values[i] = ctx.undecided_value; > + int *scratch = xcalloc (n, sizeof (int)); > + for (size_t i = 0; i < n; ++i) > + scratch[i] = i; > + qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx); > + free (scratch); > + return ctx.decided_values; > +} > + > +/* Callback function for qsort which counts the number of invocations > + in *CLOSURE. */ > +static int > +compare_counter (const void *l1, const void *r1, void *closure) > +{ > + const int *l = l1; > + const int *r = r1; > + unsigned long long int *counter = closure; > + ++*counter; > + return *l - *r; > +} > + > +/* Count the comparisons required for an adversarial permutation of > + length N. */ > +static unsigned long long int > +count_comparisons (size_t n) > +{ > + int *array = create_permutation (n); > + unsigned long long int counter = 0; > + qsort_r (array, n, sizeof (*array), compare_counter, &counter); > + free (array); > + return counter; > +} > + > +/* Check the scaling factor for one adversarial permutation of length > + N, and report some statistics. */ > +static void > +check_one_n (size_t n) > +{ > + unsigned long long int count = count_comparisons (n); > + double factor = count / (n * log (count)); > + printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n", > + n, count, factor); > + /* This is an arbitrary factor which is true for the current > + implementation across a wide range of sizes. */ > + TEST_VERIFY (factor <= 4.5); > +} > + > +static int > +do_test (void) > +{ > + check_one_n (100); > + check_one_n (1000); > + for (int i = 1; i <= 15; ++i) > + check_one_n (i * 10 * 1000); > + return 0; > +} > + > +#include <support/test-driver.c> Ok.
diff --git a/stdlib/Makefile b/stdlib/Makefile index 48688f6a27..6194d1cb22 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -215,6 +215,7 @@ tests := \ tst-qsort \ tst-qsort2 \ tst-qsort3 \ + tst-qsort5 \ tst-quick_exit \ tst-rand48 \ tst-rand48-2 \ @@ -512,3 +513,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3 '$(run-program-env)' '$(test-program-prefix-after-env)' \ $(common-objpfx)stdlib/; \ $(evaluate-test) + +$(objpfx)tst-qsort5: $(libm) diff --git a/stdlib/qsort.c b/stdlib/qsort.c index a2f9e916ef..be01fb5598 100644 --- a/stdlib/qsort.c +++ b/stdlib/qsort.c @@ -389,14 +389,23 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size, { if ((size_t) (hi - left_ptr) <= max_thresh) /* Ignore both small partitions. */ - top = pop (top, &lo, &hi, &depth); + { + top = pop (top, &lo, &hi, &depth); + --depth; + } else - /* Ignore small left partition. */ - lo = left_ptr; + { + /* Ignore small left partition. */ + lo = left_ptr; + --depth; + } } else if ((size_t) (hi - left_ptr) <= max_thresh) /* Ignore small right partition. */ - hi = right_ptr; + { + hi = right_ptr; + --depth; + } else if ((right_ptr - lo) > (hi - left_ptr)) { /* Push larger left partition indices. */ diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c new file mode 100644 index 0000000000..d3a88c30f8 --- /dev/null +++ b/stdlib/tst-qsort5.c @@ -0,0 +1,171 @@ +/* Adversarial test for qsort_r. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The approach follows Douglas McIlroy, A Killer Adversary for + Quicksort. Software—Practice and Experience 29 (1999) 341-344. + Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf> + (2023-11-17). */ + +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include <support/check.h> +#include <support/support.h> + +struct context +{ + /* Called the gas value in the paper. This value is larger than all + other values (length minus one will do), so comparison with any + decided value has a known result. */ + int undecided_value; + + /* If comparing undecided values, one of them as to be assigned a + value to ensure consistency with future comparisons. This is the + value that will be used. Starts out at zero. */ + int next_decided; + + /* Used to trick pivot selection. Deciding the value for the last + seen undcided value in a decided/undecided comparison happens + to trick the many qsort implementations. */ + int last_undecided_index; + + /* This array contains the actually asigned values. The call to + qsort_r sorts a different array that contains indices into this + array. */ + int *decided_values; +}; + +static int +compare_opponent (const void *l1, const void *r1, void *ctx1) +{ + const int *l = l1; + const int *r = r1; + struct context *ctx = ctx1; + int rvalue = ctx->decided_values[*r]; + int lvalue = ctx->decided_values[*l]; + + if (lvalue == ctx->undecided_value) + { + if (rvalue == ctx->undecided_value) + { + /* Both values are undecided. In this case, make a decision + for the last-used undecided value. This is tweak is very + specific to quicksort. */ + if (*l == ctx->last_undecided_index) + { + ctx->decided_values[*l] = ctx->next_decided; + ++ctx->next_decided; + /* The undecided value or *r is greater. */ + return -1; + } + else + { + ctx->decided_values[*r] = ctx->next_decided; + ++ctx->next_decided; + /* The undecided value for *l is greater. */ + return 1; + } + } + else + { + ctx->last_undecided_index = *l; + return 1; + } + } + else + { + /* *l is a decided value. */ + if (rvalue == ctx->undecided_value) + { + ctx->last_undecided_index = *r; + /* The undecided value for *r is greater. */ + return -1; + } + else + return lvalue - rvalue; + } +} + +/* Return a pointer to the adversarial permutation of length N. */ +static int * +create_permutation (size_t n) +{ + struct context ctx = + { + .undecided_value = n - 1, /* Larger than all other values. */ + .decided_values = xcalloc (n, sizeof (int)), + }; + for (size_t i = 0; i < n; ++i) + ctx.decided_values[i] = ctx.undecided_value; + int *scratch = xcalloc (n, sizeof (int)); + for (size_t i = 0; i < n; ++i) + scratch[i] = i; + qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx); + free (scratch); + return ctx.decided_values; +} + +/* Callback function for qsort which counts the number of invocations + in *CLOSURE. */ +static int +compare_counter (const void *l1, const void *r1, void *closure) +{ + const int *l = l1; + const int *r = r1; + unsigned long long int *counter = closure; + ++*counter; + return *l - *r; +} + +/* Count the comparisons required for an adversarial permutation of + length N. */ +static unsigned long long int +count_comparisons (size_t n) +{ + int *array = create_permutation (n); + unsigned long long int counter = 0; + qsort_r (array, n, sizeof (*array), compare_counter, &counter); + free (array); + return counter; +} + +/* Check the scaling factor for one adversarial permutation of length + N, and report some statistics. */ +static void +check_one_n (size_t n) +{ + unsigned long long int count = count_comparisons (n); + double factor = count / (n * log (count)); + printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n", + n, count, factor); + /* This is an arbitrary factor which is true for the current + implementation across a wide range of sizes. */ + TEST_VERIFY (factor <= 4.5); +} + +static int +do_test (void) +{ + check_one_n (100); + check_one_n (1000); + for (int i = 1; i <= 15; ++i) + check_one_n (i * 10 * 1000); + return 0; +} + +#include <support/test-driver.c>