Message ID | mvmin00844a.fsf@suse.de |
---|---|
State | New |
Headers | show |
Series | Fix handling of collating elements in fnmatch (bug 17396, bug 16976) | expand |
* Andreas Schwab: > This fixes the same bug in fnmatch that was fixed by commit 7e2f0d2d77 for > regexp matching. As a side effect it also removes the use of an unbound > VLA. > > [BZ #16976] > [BZ #17396] > * posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When > looking up collating elements match against (wide) character > sequence instead of name. Correct alignment adjustment. > * posix/fnmatch.c: Don't include "../locale/elem-hash.h". > * posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5. > (LOCALES): Add cs_CZ.ISO-8859-2. > * posix/tst-fnmatch4.c: New file. > * posix/tst-fnmatch5.c: New file. > + /* Compare the wide char sequence. */ > + && memcmp (startp + 1, &wextra[1], > + c1 * sizeof (UCHAR)) == 0) Could this use wmemcmp? > + if (/* Compare the length of the > + sequence. */ > + c1 == wextra[0] > + /* Compare the wide char sequence. */ > + && memcmp (startp + 1, &wextra[1], > + c1 * sizeof (int32_t)) == 0) Likewise: Could this use wmemcmp? Rest of the patch looks fine to me, as far as I can judge such matters. Please note: The copyright year on the new files needs to be adjusted. Thanks, Florian
On Jan 07 2019, Florian Weimer <fweimer@redhat.com> wrote: >> + /* Compare the wide char sequence. */ >> + && memcmp (startp + 1, &wextra[1], >> + c1 * sizeof (UCHAR)) == 0) > > Could this use wmemcmp? That required adding an internal alias. Andreas. [BZ #16976] [BZ #17396] * posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When looking up collating elements match against (wide) character sequence instead of name. Correct alignment adjustment. * posix/fnmatch.c: Don't include "../locale/elem-hash.h". (WMEMCMP) [HANDLE_MULTIBYTE]: Define. * posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5. (LOCALES): Add cs_CZ.ISO-8859-2. * posix/tst-fnmatch4.c: New file. * posix/tst-fnmatch5.c: New file. * include/wchar.h (__wmemcmp): Declare. * wcsmbs/wmemcmp.c: Define __wmemcmp and add wmemcmp as weak alias. * sysdeps/i386/i686/multiarch/wmemcmp.c: Likewise. * sysdeps/x86_64/multiarch/wmemcmp.c: Likewise. * sysdeps/s390/wmemcmp.c: Likewise. --- include/wchar.h | 2 + posix/Makefile | 4 +- posix/fnmatch.c | 6 +- posix/fnmatch_loop.c | 228 +++++++----------- .../s390/wmemcmp.c => posix/tst-fnmatch4.c | 56 +++-- posix/tst-fnmatch5.c | 52 ++++ sysdeps/i386/i686/multiarch/wmemcmp.c | 3 +- sysdeps/s390/wmemcmp.c | 7 +- sysdeps/x86_64/multiarch/wmemcmp.c | 3 +- wcsmbs/wmemcmp.c | 9 +- 10 files changed, 197 insertions(+), 173 deletions(-) copy sysdeps/s390/wmemcmp.c => posix/tst-fnmatch4.c (52%) create mode 100644 posix/tst-fnmatch5.c diff --git a/include/wchar.h b/include/wchar.h index 86506d28e9..614073bcb3 100644 --- a/include/wchar.h +++ b/include/wchar.h @@ -143,6 +143,8 @@ libc_hidden_proto (wmemchr) libc_hidden_proto (__wmemchr) libc_hidden_proto (wmemset) libc_hidden_proto (__wmemset) +extern int __wmemcmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n) + __THROW __attribute_pure__; /* Now define the internal interfaces. */ extern int __wcscasecmp (const wchar_t *__s1, const wchar_t *__s2) diff --git a/posix/Makefile b/posix/Makefile index cfd914ff21..873947f72e 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -93,6 +93,7 @@ tests := test-errno tstgetopt testfnm runtests runptests \ bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \ tst-pathconf tst-rxspencer-no-utf8 \ tst-fnmatch3 bug-regex36 \ + tst-fnmatch4 tst-fnmatch5 \ tst-posix_spawn-fd tst-posix_spawn-setsid \ tst-posix_fadvise tst-posix_fadvise64 \ tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \ @@ -168,7 +169,8 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test endif LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \ - en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 + en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \ + cs_CZ.ISO-8859-2 include ../gen-locales.mk $(objpfx)bug-regex1.out: $(gen-locales) diff --git a/posix/fnmatch.c b/posix/fnmatch.c index 7b225cf2ba..a58e1743ce 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -53,7 +53,6 @@ we support a correct implementation only in glibc. */ #ifdef _LIBC # include "../locale/localeinfo.h" -# include "../locale/elem-hash.h" # include "../locale/coll-lookup.h" # include <shlib-compat.h> @@ -237,6 +236,11 @@ __wcschrnul (const wchar_t *s, wint_t c) # define MEMPCPY(D, S, N) __wmempcpy (D, S, N) # define MEMCHR(S, C, N) __wmemchr (S, C, N) # define STRCOLL(S1, S2) wcscoll (S1, S2) +# ifdef _LIBC +# define WMEMCMP(S1, S2, N) __wmemcmp (S1, S2, N) +# else +# define WMEMCMP(S1, S2, N) wmemcmp (S1, S2, N) +# endif # define WIDE_CHAR_VERSION 1 /* Change the name the header defines so it doesn't conflict with the <locale/weight.h> version included above. */ diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index f888c66dfb..fa39b21395 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -494,26 +494,12 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { int32_t table_size; const int32_t *symb_table; -# if WIDE_CHAR_VERSION - char str[c1]; - unsigned int strcnt; -# else -# define str (startp + 1) -# endif const unsigned char *extra; int32_t idx; int32_t elem; - int32_t second; - int32_t hash; - # if WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; -#endif + CHAR *wextra; +# endif table_size = _NL_CURRENT_WORD (LC_COLLATE, @@ -525,71 +511,54 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - /* Locate the character in the hashing table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem - + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element. */ + idx += 1 + extra[idx]; +# if WIDE_CHAR_VERSION + /* Skip the byte sequence of the + collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + + wextra = (CHAR *) &extra[idx + 4]; + + if (/* Compare the length of the sequence. */ + c1 == wextra[0] + /* Compare the wide char sequence. */ + && WMEMCMP (startp + 1, &wextra[1], + c1) == 0) + /* Yep, this is the entry. */ + break; +# else + if (/* Compare the length of the sequence. */ + c1 == extra[idx] + /* Compare the byte sequence. */ + && memcmp (startp + 1, + &extra[idx + 1], c1) == 0) + /* Yep, this is the entry. */ + break; +# endif + } - if (symb_table[2 * elem] != 0) + if (elem < table_size) { /* Compare the byte sequence but only if this is not part of a range. */ -# if WIDE_CHAR_VERSION - int32_t *wextra; + if (! is_range - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - - wextra = (int32_t *) &extra[idx + 4]; -# endif - - if (! is_range) - { # if WIDE_CHAR_VERSION - for (c1 = 0; - (int32_t) c1 < wextra[idx]; - ++c1) - if (n[c1] != wextra[1 + c1]) - break; - - if ((int32_t) c1 == wextra[idx]) - goto matched; + && WMEMCMP (n, &wextra[1], c1) == 0 # else - for (c1 = 0; c1 < extra[idx]; ++c1) - if (n[c1] != extra[1 + c1]) - break; - - if (c1 == extra[idx]) - goto matched; + && memcmp (n, &extra[idx + 1], c1) == 0 # endif + ) + { + n += c1 - 1; + goto matched; } /* Get the collation sequence value. */ @@ -597,9 +566,9 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, # if WIDE_CHAR_VERSION cold = wextra[1 + wextra[idx]]; # else - /* Adjust for the alignment. */ idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; cold = *((int32_t *) &extra[idx]); # endif @@ -609,10 +578,10 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { /* No valid character. Match it as a single byte. */ - if (!is_range && *n == str[0]) + if (!is_range && *n == startp[1]) goto matched; - cold = str[0]; + cold = startp[1]; c = *p++; } else @@ -620,7 +589,6 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, } } else -# undef str #endif { c = FOLD (c); @@ -712,25 +680,11 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { int32_t table_size; const int32_t *symb_table; -# if WIDE_CHAR_VERSION - char str[c1]; - unsigned int strcnt; -# else -# define str (startp + 1) -# endif const unsigned char *extra; int32_t idx; int32_t elem; - int32_t second; - int32_t hash; - # if WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; + CHAR *wextra; # endif table_size = @@ -743,71 +697,63 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - /* Locate the character in the hashing - table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - - if (symb_table[2 * elem] != 0) - { - /* Compare the byte sequence but only if - this is not part of a range. */ + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating + element. */ + idx += 1 + extra[idx]; # if WIDE_CHAR_VERSION - int32_t *wextra; - - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~4; - - wextra = (int32_t *) &extra[idx + 4]; + /* Skip the byte sequence of the + collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + + wextra = (CHAR *) &extra[idx + 4]; + + if (/* Compare the length of the + sequence. */ + c1 == wextra[0] + /* Compare the wide char sequence. */ + && WMEMCMP (startp + 1, &wextra[1], + c1) == 0) + /* Yep, this is the entry. */ + break; +# else + if (/* Compare the length of the + sequence. */ + c1 == extra[idx] + /* Compare the byte sequence. */ + && memcmp (startp + 1, + &extra[idx + 1], c1) == 0) + /* Yep, this is the entry. */ + break; # endif + } + + if (elem < table_size) + { /* Get the collation sequence value. */ is_seqval = 1; # if WIDE_CHAR_VERSION cend = wextra[1 + wextra[idx]]; # else - /* Adjust for the alignment. */ idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; cend = *((int32_t *) &extra[idx]); # endif } - else if (symb_table[2 * elem] != 0 && c1 == 1) + else if (c1 == 1) { - cend = str[0]; + cend = startp[1]; c = *p++; } else return FNM_NOMATCH; } -# undef str } else { diff --git a/sysdeps/s390/wmemcmp.c b/posix/tst-fnmatch4.c similarity index 52% copy from sysdeps/s390/wmemcmp.c copy to posix/tst-fnmatch4.c index ec0b4027f8..dc13f89288 100644 --- a/sysdeps/s390/wmemcmp.c +++ b/posix/tst-fnmatch4.c @@ -1,5 +1,5 @@ -/* Multiple versions of wmemcmp. - Copyright (C) 2015-2019 Free Software Foundation, Inc. +/* Test for fnmatch handling of collating elements + Copyright (C) 2019 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,23 +16,35 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <ifunc-wmemcmp.h> - -#if HAVE_WMEMCMP_IFUNC -# include <wchar.h> -# include <ifunc-resolve.h> - -# if HAVE_WMEMCMP_C -extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden; -# endif - -# if HAVE_WMEMCMP_Z13 -extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden; -# endif - -s390_libc_ifunc_expr (wmemcmp, wmemcmp, - (HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX)) - ? WMEMCMP_Z13 - : WMEMCMP_DEFAULT - ) -#endif +#include <stdio.h> +#include <locale.h> +#include <fnmatch.h> + +static int +do_test_locale (const char *locale) +{ + const char *pattern = "[[.ch.]]"; + + if (setlocale (LC_ALL, locale) == NULL) + { + printf ("could not set locale %s\n", locale); + return 1; + } + + if (fnmatch (pattern, "ch", 0) != 0) + { + printf ("%s didn't match in locale %s\n", pattern, locale); + return 1; + } + + return 0; +} + +static int +do_test (void) +{ + return (do_test_locale ("cs_CZ.ISO-8859-2") + || do_test_locale ("cs_CZ.UTF-8")); +} + +#include <support/test-driver.c> diff --git a/posix/tst-fnmatch5.c b/posix/tst-fnmatch5.c new file mode 100644 index 0000000000..5ebff1f424 --- /dev/null +++ b/posix/tst-fnmatch5.c @@ -0,0 +1,52 @@ +/* Test for fnmatch handling of collating elements + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fnmatch.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> + +#define LENGTH 20000000 + +static char pattern[LENGTH + 7]; + +static int +do_test (void) +{ + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + { + puts ("could not set locale"); + return 1; + } + pattern[0] = '['; + pattern[1] = '['; + pattern[2] = '.'; + memset (pattern + 3, 'a', LENGTH); + pattern[LENGTH + 3] = '.'; + pattern[LENGTH + 4] = ']'; + pattern[LENGTH + 5] = ']'; + int ret = fnmatch (pattern, "a", 0); + if (ret == 0) + { + puts ("fnmatch returned 0 for invalid pattern"); + return 1; + } + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/sysdeps/i386/i686/multiarch/wmemcmp.c index ce25991352..7674530c68 100644 --- a/sysdeps/i386/i686/multiarch/wmemcmp.c +++ b/sysdeps/i386/i686/multiarch/wmemcmp.c @@ -26,5 +26,6 @@ # define SYMBOL_NAME wmemcmp # include "ifunc-ssse3-sse4_2.h" -libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ()); +libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ()); +weak_alias (__wmemcmp, wmemcmp) #endif diff --git a/sysdeps/s390/wmemcmp.c b/sysdeps/s390/wmemcmp.c index ec0b4027f8..2f619550a6 100644 --- a/sysdeps/s390/wmemcmp.c +++ b/sysdeps/s390/wmemcmp.c @@ -23,16 +23,17 @@ # include <ifunc-resolve.h> # if HAVE_WMEMCMP_C -extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden; +extern __typeof (__wmemcmp) WMEMCMP_C attribute_hidden; # endif # if HAVE_WMEMCMP_Z13 -extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden; +extern __typeof (__wmemcmp) WMEMCMP_Z13 attribute_hidden; # endif -s390_libc_ifunc_expr (wmemcmp, wmemcmp, +s390_libc_ifunc_expr (__wmemcmp, __wmemcmp, (HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX)) ? WMEMCMP_Z13 : WMEMCMP_DEFAULT ) +weak_alias (__wmemcmp, wmemcmp) #endif diff --git a/sysdeps/x86_64/multiarch/wmemcmp.c b/sysdeps/x86_64/multiarch/wmemcmp.c index 136a7b05e1..826c90bb77 100644 --- a/sysdeps/x86_64/multiarch/wmemcmp.c +++ b/sysdeps/x86_64/multiarch/wmemcmp.c @@ -26,5 +26,6 @@ # define SYMBOL_NAME wmemcmp # include "ifunc-memcmp.h" -libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ()); +libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ()); +weak_alias (__wmemcmp, wmemcmp) #endif diff --git a/wcsmbs/wmemcmp.c b/wcsmbs/wmemcmp.c index 5b243bab8f..5e137fd3dd 100644 --- a/wcsmbs/wmemcmp.c +++ b/wcsmbs/wmemcmp.c @@ -18,12 +18,12 @@ #include <wchar.h> -#ifndef WMEMCMP -# define WMEMCMP wmemcmp +#ifdef WMEMCMP +# define __wmemcmp WMEMCMP #endif int -WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n) +__wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) { wchar_t c1; wchar_t c2; @@ -81,3 +81,6 @@ WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n) return 0; } +#ifndef WMEMCMP +weak_alias (__wmemcmp, wmemcmp) +#endif
On 09/01/2019 11:20, Andreas Schwab wrote: > diff --git a/sysdeps/s390/wmemcmp.c b/posix/tst-fnmatch4.c > similarity index 52% > copy from sysdeps/s390/wmemcmp.c > copy to posix/tst-fnmatch4.c > index ec0b4027f8..dc13f89288 100644 > --- a/sysdeps/s390/wmemcmp.c > +++ b/posix/tst-fnmatch4.c > @@ -1,5 +1,5 @@ > -/* Multiple versions of wmemcmp. > - Copyright (C) 2015-2019 Free Software Foundation, Inc. > +/* Test for fnmatch handling of collating elements > + Copyright (C) 2019 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > The GNU C Library is free software; you can redistribute it and/or > @@ -16,23 +16,35 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > -#include <ifunc-wmemcmp.h> > - > -#if HAVE_WMEMCMP_IFUNC > -# include <wchar.h> > -# include <ifunc-resolve.h> > - > -# if HAVE_WMEMCMP_C > -extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden; > -# endif > - > -# if HAVE_WMEMCMP_Z13 > -extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden; > -# endif > - > -s390_libc_ifunc_expr (wmemcmp, wmemcmp, > - (HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX)) > - ? WMEMCMP_Z13 > - : WMEMCMP_DEFAULT > - ) > -#endif > +#include <stdio.h> > +#include <locale.h> > +#include <fnmatch.h> > + > +static int > +do_test_locale (const char *locale) > +{ > + const char *pattern = "[[.ch.]]"; > + > + if (setlocale (LC_ALL, locale) == NULL) > + { > + printf ("could not set locale %s\n", locale); > + return 1; > + } > + > + if (fnmatch (pattern, "ch", 0) != 0) > + { > + printf ("%s didn't match in locale %s\n", pattern, locale); > + return 1; > + } > + > + return 0; > +} Maybe TEST_COMPARE on both check? > + > +static int > +do_test (void) > +{ > + return (do_test_locale ("cs_CZ.ISO-8859-2") > + || do_test_locale ("cs_CZ.UTF-8")); > +} > + > +#include <support/test-driver.c> > diff --git a/posix/tst-fnmatch5.c b/posix/tst-fnmatch5.c > new file mode 100644 > index 0000000000..5ebff1f424 > --- /dev/null > +++ b/posix/tst-fnmatch5.c > @@ -0,0 +1,52 @@ > +/* Test for fnmatch handling of collating elements > + Copyright (C) 2019 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <fnmatch.h> > +#include <locale.h> > +#include <stdio.h> > +#include <string.h> > + > +#define LENGTH 20000000 > + > +static char pattern[LENGTH + 7]; > + > +static int > +do_test (void) > +{ > + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) > + { > + puts ("could not set locale"); > + return 1; > + } > + pattern[0] = '['; > + pattern[1] = '['; > + pattern[2] = '.'; > + memset (pattern + 3, 'a', LENGTH); > + pattern[LENGTH + 3] = '.'; > + pattern[LENGTH + 4] = ']'; > + pattern[LENGTH + 5] = ']'; > + int ret = fnmatch (pattern, "a", 0); > + if (ret == 0) > + { > + puts ("fnmatch returned 0 for invalid pattern"); > + return 1; > + } > + return 0; > +} > + > +#include <support/test-driver.c> Same as before.
* Andreas Schwab: > On Jan 07 2019, Florian Weimer <fweimer@redhat.com> wrote: > >>> + /* Compare the wide char sequence. */ >>> + && memcmp (startp + 1, &wextra[1], >>> + c1 * sizeof (UCHAR)) == 0) >> >> Could this use wmemcmp? > > That required adding an internal alias. Oh, sorry, I didn't realize we were missing that. I believe that due to the freeze, we should go with the original patch then. Thanks, Florian
On Jan 09 2019, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote:
> Maybe TEST_COMPARE on both check?
Rather TEST_VERIFY(_EXIT).
Andreas.
[BZ #16976]
[BZ #17396]
* posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When
looking up collating elements match against (wide) character
sequence instead of name. Correct alignment adjustment.
* posix/fnmatch.c: Don't include "../locale/elem-hash.h".
(WMEMCMP) [HANDLE_MULTIBYTE]: Define.
* posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5.
(LOCALES): Add cs_CZ.ISO-8859-2.
* posix/tst-fnmatch4.c: New file.
* posix/tst-fnmatch5.c: New file.
* include/wchar.h (__wmemcmp): Declare.
* wcsmbs/wmemcmp.c: Define __wmemcmp and add wmemcmp as weak alias.
* sysdeps/i386/i686/multiarch/wmemcmp.c: Likewise.
* sysdeps/x86_64/multiarch/wmemcmp.c: Likewise.
* sysdeps/s390/wmemcmp.c: Likewise.
---
include/wchar.h | 2 +
posix/Makefile | 4 +-
posix/fnmatch.c | 6 +-
posix/fnmatch_loop.c | 228 +++++++-----------
.../wmemcmp.c => posix/tst-fnmatch4.c | 35 ++-
.../s390/wmemcmp.c => posix/tst-fnmatch5.c | 44 ++--
sysdeps/i386/i686/multiarch/wmemcmp.c | 3 +-
sysdeps/s390/wmemcmp.c | 7 +-
sysdeps/x86_64/multiarch/wmemcmp.c | 3 +-
wcsmbs/wmemcmp.c | 9 +-
10 files changed, 160 insertions(+), 181 deletions(-)
copy sysdeps/i386/i686/multiarch/wmemcmp.c => posix/tst-fnmatch4.c (59%)
copy sysdeps/s390/wmemcmp.c => posix/tst-fnmatch5.c (53%)
diff --git a/include/wchar.h b/include/wchar.h
index 86506d28e9..614073bcb3 100644
--- a/include/wchar.h
+++ b/include/wchar.h
@@ -143,6 +143,8 @@ libc_hidden_proto (wmemchr)
libc_hidden_proto (__wmemchr)
libc_hidden_proto (wmemset)
libc_hidden_proto (__wmemset)
+extern int __wmemcmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n)
+ __THROW __attribute_pure__;
/* Now define the internal interfaces. */
extern int __wcscasecmp (const wchar_t *__s1, const wchar_t *__s2)
diff --git a/posix/Makefile b/posix/Makefile
index cfd914ff21..873947f72e 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -93,6 +93,7 @@ tests := test-errno tstgetopt testfnm runtests runptests \
bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
tst-pathconf tst-rxspencer-no-utf8 \
tst-fnmatch3 bug-regex36 \
+ tst-fnmatch4 tst-fnmatch5 \
tst-posix_spawn-fd tst-posix_spawn-setsid \
tst-posix_fadvise tst-posix_fadvise64 \
tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \
@@ -168,7 +169,8 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test
endif
LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \
- en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8
+ en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \
+ cs_CZ.ISO-8859-2
include ../gen-locales.mk
$(objpfx)bug-regex1.out: $(gen-locales)
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 7b225cf2ba..a58e1743ce 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -53,7 +53,6 @@
we support a correct implementation only in glibc. */
#ifdef _LIBC
# include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
# include "../locale/coll-lookup.h"
# include <shlib-compat.h>
@@ -237,6 +236,11 @@ __wcschrnul (const wchar_t *s, wint_t c)
# define MEMPCPY(D, S, N) __wmempcpy (D, S, N)
# define MEMCHR(S, C, N) __wmemchr (S, C, N)
# define STRCOLL(S1, S2) wcscoll (S1, S2)
+# ifdef _LIBC
+# define WMEMCMP(S1, S2, N) __wmemcmp (S1, S2, N)
+# else
+# define WMEMCMP(S1, S2, N) wmemcmp (S1, S2, N)
+# endif
# define WIDE_CHAR_VERSION 1
/* Change the name the header defines so it doesn't conflict with
the <locale/weight.h> version included above. */
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index f888c66dfb..fa39b21395 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -494,26 +494,12 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
{
int32_t table_size;
const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
- char str[c1];
- unsigned int strcnt;
-# else
-# define str (startp + 1)
-# endif
const unsigned char *extra;
int32_t idx;
int32_t elem;
- int32_t second;
- int32_t hash;
-
# if WIDE_CHAR_VERSION
- /* We have to convert the name to a single-byte
- string. This is possible since the names
- consist of ASCII characters and the internal
- representation is UCS4. */
- for (strcnt = 0; strcnt < c1; ++strcnt)
- str[strcnt] = startp[1 + strcnt];
-#endif
+ CHAR *wextra;
+# endif
table_size =
_NL_CURRENT_WORD (LC_COLLATE,
@@ -525,71 +511,54 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
- /* Locate the character in the hashing table. */
- hash = elem_hash (str, c1);
-
- idx = 0;
- elem = hash % table_size;
- if (symb_table[2 * elem] != 0)
- {
- second = hash % (table_size - 2) + 1;
-
- do
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- && (c1
- == extra[symb_table[2 * elem + 1]])
- && memcmp (str,
- &extra[symb_table[2 * elem
- + 1]
- + 1], c1) == 0)
- {
- /* Yep, this is the entry. */
- idx = symb_table[2 * elem + 1];
- idx += 1 + extra[idx];
- break;
- }
-
- /* Next entry. */
- elem += second;
- }
- while (symb_table[2 * elem] != 0);
- }
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element. */
+ idx += 1 + extra[idx];
+# if WIDE_CHAR_VERSION
+ /* Skip the byte sequence of the
+ collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+
+ wextra = (CHAR *) &extra[idx + 4];
+
+ if (/* Compare the length of the sequence. */
+ c1 == wextra[0]
+ /* Compare the wide char sequence. */
+ && WMEMCMP (startp + 1, &wextra[1],
+ c1) == 0)
+ /* Yep, this is the entry. */
+ break;
+# else
+ if (/* Compare the length of the sequence. */
+ c1 == extra[idx]
+ /* Compare the byte sequence. */
+ && memcmp (startp + 1,
+ &extra[idx + 1], c1) == 0)
+ /* Yep, this is the entry. */
+ break;
+# endif
+ }
- if (symb_table[2 * elem] != 0)
+ if (elem < table_size)
{
/* Compare the byte sequence but only if
this is not part of a range. */
-# if WIDE_CHAR_VERSION
- int32_t *wextra;
+ if (! is_range
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
-
- wextra = (int32_t *) &extra[idx + 4];
-# endif
-
- if (! is_range)
- {
# if WIDE_CHAR_VERSION
- for (c1 = 0;
- (int32_t) c1 < wextra[idx];
- ++c1)
- if (n[c1] != wextra[1 + c1])
- break;
-
- if ((int32_t) c1 == wextra[idx])
- goto matched;
+ && WMEMCMP (n, &wextra[1], c1) == 0
# else
- for (c1 = 0; c1 < extra[idx]; ++c1)
- if (n[c1] != extra[1 + c1])
- break;
-
- if (c1 == extra[idx])
- goto matched;
+ && memcmp (n, &extra[idx + 1], c1) == 0
# endif
+ )
+ {
+ n += c1 - 1;
+ goto matched;
}
/* Get the collation sequence value. */
@@ -597,9 +566,9 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
# if WIDE_CHAR_VERSION
cold = wextra[1 + wextra[idx]];
# else
- /* Adjust for the alignment. */
idx += 1 + extra[idx];
- idx = (idx + 3) & ~4;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
cold = *((int32_t *) &extra[idx]);
# endif
@@ -609,10 +578,10 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
{
/* No valid character. Match it as a
single byte. */
- if (!is_range && *n == str[0])
+ if (!is_range && *n == startp[1])
goto matched;
- cold = str[0];
+ cold = startp[1];
c = *p++;
}
else
@@ -620,7 +589,6 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
}
}
else
-# undef str
#endif
{
c = FOLD (c);
@@ -712,25 +680,11 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
{
int32_t table_size;
const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
- char str[c1];
- unsigned int strcnt;
-# else
-# define str (startp + 1)
-# endif
const unsigned char *extra;
int32_t idx;
int32_t elem;
- int32_t second;
- int32_t hash;
-
# if WIDE_CHAR_VERSION
- /* We have to convert the name to a single-byte
- string. This is possible since the names
- consist of ASCII characters and the internal
- representation is UCS4. */
- for (strcnt = 0; strcnt < c1; ++strcnt)
- str[strcnt] = startp[1 + strcnt];
+ CHAR *wextra;
# endif
table_size =
@@ -743,71 +697,63 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
- /* Locate the character in the hashing
- table. */
- hash = elem_hash (str, c1);
-
- idx = 0;
- elem = hash % table_size;
- if (symb_table[2 * elem] != 0)
- {
- second = hash % (table_size - 2) + 1;
-
- do
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- && (c1
- == extra[symb_table[2 * elem + 1]])
- && memcmp (str,
- &extra[symb_table[2 * elem + 1]
- + 1], c1) == 0)
- {
- /* Yep, this is the entry. */
- idx = symb_table[2 * elem + 1];
- idx += 1 + extra[idx];
- break;
- }
-
- /* Next entry. */
- elem += second;
- }
- while (symb_table[2 * elem] != 0);
- }
-
- if (symb_table[2 * elem] != 0)
- {
- /* Compare the byte sequence but only if
- this is not part of a range. */
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating
+ element. */
+ idx += 1 + extra[idx];
# if WIDE_CHAR_VERSION
- int32_t *wextra;
-
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~4;
-
- wextra = (int32_t *) &extra[idx + 4];
+ /* Skip the byte sequence of the
+ collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+
+ wextra = (CHAR *) &extra[idx + 4];
+
+ if (/* Compare the length of the
+ sequence. */
+ c1 == wextra[0]
+ /* Compare the wide char sequence. */
+ && WMEMCMP (startp + 1, &wextra[1],
+ c1) == 0)
+ /* Yep, this is the entry. */
+ break;
+# else
+ if (/* Compare the length of the
+ sequence. */
+ c1 == extra[idx]
+ /* Compare the byte sequence. */
+ && memcmp (startp + 1,
+ &extra[idx + 1], c1) == 0)
+ /* Yep, this is the entry. */
+ break;
# endif
+ }
+
+ if (elem < table_size)
+ {
/* Get the collation sequence value. */
is_seqval = 1;
# if WIDE_CHAR_VERSION
cend = wextra[1 + wextra[idx]];
# else
- /* Adjust for the alignment. */
idx += 1 + extra[idx];
- idx = (idx + 3) & ~4;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
cend = *((int32_t *) &extra[idx]);
# endif
}
- else if (symb_table[2 * elem] != 0 && c1 == 1)
+ else if (c1 == 1)
{
- cend = str[0];
+ cend = startp[1];
c = *p++;
}
else
return FNM_NOMATCH;
}
-# undef str
}
else
{
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/posix/tst-fnmatch4.c
similarity index 59%
copy from sysdeps/i386/i686/multiarch/wmemcmp.c
copy to posix/tst-fnmatch4.c
index ce25991352..370265ddf0 100644
--- a/sysdeps/i386/i686/multiarch/wmemcmp.c
+++ b/posix/tst-fnmatch4.c
@@ -1,6 +1,5 @@
-/* Multiple versions of wmemcmp.
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2017-2019 Free Software Foundation, Inc.
+/* Test for fnmatch handling of collating elements
+ Copyright (C) 2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,14 +16,26 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
-# define wmemcmp __redirect_wmemcmp
-# include <wchar.h>
-# undef wmemcmp
+#include <stdio.h>
+#include <locale.h>
+#include <fnmatch.h>
+#include <support/check.h>
-# define SYMBOL_NAME wmemcmp
-# include "ifunc-ssse3-sse4_2.h"
+static void
+do_test_locale (const char *locale)
+{
+ TEST_VERIFY_EXIT (setlocale (LC_ALL, locale) != NULL);
-libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ());
-#endif
+ TEST_VERIFY (fnmatch ("[[.ch.]]", "ch", 0) == 0);
+}
+
+static int
+do_test (void)
+{
+ do_test_locale ("cs_CZ.ISO-8859-2");
+ do_test_locale ("cs_CZ.UTF-8");
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/s390/wmemcmp.c b/posix/tst-fnmatch5.c
similarity index 53%
copy from sysdeps/s390/wmemcmp.c
copy to posix/tst-fnmatch5.c
index ec0b4027f8..241371c752 100644
--- a/sysdeps/s390/wmemcmp.c
+++ b/posix/tst-fnmatch5.c
@@ -1,5 +1,5 @@
-/* Multiple versions of wmemcmp.
- Copyright (C) 2015-2019 Free Software Foundation, Inc.
+/* Test for fnmatch handling of collating elements
+ Copyright (C) 2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,23 +16,31 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <ifunc-wmemcmp.h>
+#include <fnmatch.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
-#if HAVE_WMEMCMP_IFUNC
-# include <wchar.h>
-# include <ifunc-resolve.h>
+#define LENGTH 20000000
-# if HAVE_WMEMCMP_C
-extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden;
-# endif
+static char pattern[LENGTH + 7];
-# if HAVE_WMEMCMP_Z13
-extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden;
-# endif
+static int
+do_test (void)
+{
+ TEST_VERIFY_EXIT (setlocale (LC_ALL, "en_US.UTF-8") != NULL);
-s390_libc_ifunc_expr (wmemcmp, wmemcmp,
- (HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX))
- ? WMEMCMP_Z13
- : WMEMCMP_DEFAULT
- )
-#endif
+ pattern[0] = '[';
+ pattern[1] = '[';
+ pattern[2] = '.';
+ memset (pattern + 3, 'a', LENGTH);
+ pattern[LENGTH + 3] = '.';
+ pattern[LENGTH + 4] = ']';
+ pattern[LENGTH + 5] = ']';
+ TEST_VERIFY (fnmatch (pattern, "a", 0) != 0);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/sysdeps/i386/i686/multiarch/wmemcmp.c
index ce25991352..7674530c68 100644
--- a/sysdeps/i386/i686/multiarch/wmemcmp.c
+++ b/sysdeps/i386/i686/multiarch/wmemcmp.c
@@ -26,5 +26,6 @@
# define SYMBOL_NAME wmemcmp
# include "ifunc-ssse3-sse4_2.h"
-libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ());
+libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ());
+weak_alias (__wmemcmp, wmemcmp)
#endif
diff --git a/sysdeps/s390/wmemcmp.c b/sysdeps/s390/wmemcmp.c
index ec0b4027f8..2f619550a6 100644
--- a/sysdeps/s390/wmemcmp.c
+++ b/sysdeps/s390/wmemcmp.c
@@ -23,16 +23,17 @@
# include <ifunc-resolve.h>
# if HAVE_WMEMCMP_C
-extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden;
+extern __typeof (__wmemcmp) WMEMCMP_C attribute_hidden;
# endif
# if HAVE_WMEMCMP_Z13
-extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden;
+extern __typeof (__wmemcmp) WMEMCMP_Z13 attribute_hidden;
# endif
-s390_libc_ifunc_expr (wmemcmp, wmemcmp,
+s390_libc_ifunc_expr (__wmemcmp, __wmemcmp,
(HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX))
? WMEMCMP_Z13
: WMEMCMP_DEFAULT
)
+weak_alias (__wmemcmp, wmemcmp)
#endif
diff --git a/sysdeps/x86_64/multiarch/wmemcmp.c b/sysdeps/x86_64/multiarch/wmemcmp.c
index 136a7b05e1..826c90bb77 100644
--- a/sysdeps/x86_64/multiarch/wmemcmp.c
+++ b/sysdeps/x86_64/multiarch/wmemcmp.c
@@ -26,5 +26,6 @@
# define SYMBOL_NAME wmemcmp
# include "ifunc-memcmp.h"
-libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ());
+libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ());
+weak_alias (__wmemcmp, wmemcmp)
#endif
diff --git a/wcsmbs/wmemcmp.c b/wcsmbs/wmemcmp.c
index 5b243bab8f..5e137fd3dd 100644
--- a/wcsmbs/wmemcmp.c
+++ b/wcsmbs/wmemcmp.c
@@ -18,12 +18,12 @@
#include <wchar.h>
-#ifndef WMEMCMP
-# define WMEMCMP wmemcmp
+#ifdef WMEMCMP
+# define __wmemcmp WMEMCMP
#endif
int
-WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n)
+__wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n)
{
wchar_t c1;
wchar_t c2;
@@ -81,3 +81,6 @@ WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n)
return 0;
}
+#ifndef WMEMCMP
+weak_alias (__wmemcmp, wmemcmp)
+#endif
On 09/01/2019 12:31, Andreas Schwab wrote: > diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/posix/tst-fnmatch4.c > similarity index 59% > copy from sysdeps/i386/i686/multiarch/wmemcmp.c > copy to posix/tst-fnmatch4.c > index ce25991352..370265ddf0 100644 > --- a/sysdeps/i386/i686/multiarch/wmemcmp.c > +++ b/posix/tst-fnmatch4.c > @@ -1,6 +1,5 @@ > -/* Multiple versions of wmemcmp. > - All versions must be listed in ifunc-impl-list.c. > - Copyright (C) 2017-2019 Free Software Foundation, Inc. I think something is wrong with git format-patch here.
On Jan 09 2019, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > On 09/01/2019 12:31, Andreas Schwab wrote: >> diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/posix/tst-fnmatch4.c >> similarity index 59% >> copy from sysdeps/i386/i686/multiarch/wmemcmp.c >> copy to posix/tst-fnmatch4.c >> index ce25991352..370265ddf0 100644 >> --- a/sysdeps/i386/i686/multiarch/wmemcmp.c >> +++ b/posix/tst-fnmatch4.c >> @@ -1,6 +1,5 @@ >> -/* Multiple versions of wmemcmp. >> - All versions must be listed in ifunc-impl-list.c. >> - Copyright (C) 2017-2019 Free Software Foundation, Inc. > > I think something is wrong with git format-patch here. I which way? Andreas.
On 10/01/2019 06:43, Andreas Schwab wrote: > On Jan 09 2019, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > >> On 09/01/2019 12:31, Andreas Schwab wrote: >>> diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/posix/tst-fnmatch4.c >>> similarity index 59% >>> copy from sysdeps/i386/i686/multiarch/wmemcmp.c >>> copy to posix/tst-fnmatch4.c >>> index ce25991352..370265ddf0 100644 >>> --- a/sysdeps/i386/i686/multiarch/wmemcmp.c >>> +++ b/posix/tst-fnmatch4.c >>> @@ -1,6 +1,5 @@ >>> -/* Multiple versions of wmemcmp. >>> - All versions must be listed in ifunc-impl-list.c. >>> - Copyright (C) 2017-2019 Free Software Foundation, Inc. >> >> I think something is wrong with git format-patch here. > > I which way? It is kind hard to evaluate the changes when diff is against unrelated files.
On Jan 10 2019, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > On 10/01/2019 06:43, Andreas Schwab wrote: >> On Jan 09 2019, Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: >> >>> On 09/01/2019 12:31, Andreas Schwab wrote: >>>> diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/posix/tst-fnmatch4.c >>>> similarity index 59% >>>> copy from sysdeps/i386/i686/multiarch/wmemcmp.c >>>> copy to posix/tst-fnmatch4.c >>>> index ce25991352..370265ddf0 100644 >>>> --- a/sysdeps/i386/i686/multiarch/wmemcmp.c >>>> +++ b/posix/tst-fnmatch4.c >>>> @@ -1,6 +1,5 @@ >>>> -/* Multiple versions of wmemcmp. >>>> - All versions must be listed in ifunc-impl-list.c. >>>> - Copyright (C) 2017-2019 Free Software Foundation, Inc. >>> >>> I think something is wrong with git format-patch here. >> >> I which way? > > It is kind hard to evaluate the changes when diff is against unrelated files. 59% is greater than the default 50% renameLimit. Andreas.
diff --git a/posix/Makefile b/posix/Makefile index d5a6844c88..417f2cb4ca 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -93,6 +93,7 @@ tests := test-errno tstgetopt testfnm runtests runptests \ bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \ tst-pathconf tst-rxspencer-no-utf8 \ tst-fnmatch3 bug-regex36 \ + tst-fnmatch4 tst-fnmatch5 \ tst-posix_spawn-fd tst-posix_spawn-setsid \ tst-posix_fadvise tst-posix_fadvise64 \ tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \ @@ -168,7 +169,8 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test endif LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \ - en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 + en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \ + cs_CZ.ISO-8859-2 include ../gen-locales.mk $(objpfx)bug-regex1.out: $(gen-locales) diff --git a/posix/fnmatch.c b/posix/fnmatch.c index a9b762624f..2ef00f1e2f 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -53,7 +53,6 @@ we support a correct implementation only in glibc. */ #ifdef _LIBC # include "../locale/localeinfo.h" -# include "../locale/elem-hash.h" # include "../locale/coll-lookup.h" # include <shlib-compat.h> diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index e298cac5dc..b25072ac1e 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -494,26 +494,12 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { int32_t table_size; const int32_t *symb_table; -# if WIDE_CHAR_VERSION - char str[c1]; - unsigned int strcnt; -# else -# define str (startp + 1) -# endif const unsigned char *extra; int32_t idx; int32_t elem; - int32_t second; - int32_t hash; - # if WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; -#endif + int32_t *wextra; +# endif table_size = _NL_CURRENT_WORD (LC_COLLATE, @@ -525,71 +511,55 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - /* Locate the character in the hashing table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem - + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element. */ + idx += 1 + extra[idx]; +# if WIDE_CHAR_VERSION + /* Skip the byte sequence of the + collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + + wextra = (int32_t *) &extra[idx + 4]; + + if (/* Compare the length of the sequence. */ + c1 == wextra[0] + /* Compare the wide char sequence. */ + && memcmp (startp + 1, &wextra[1], + c1 * sizeof (UCHAR)) == 0) + /* Yep, this is the entry. */ + break; +# else + if (/* Compare the length of the sequence. */ + c1 == extra[idx] + /* Compare the byte sequence. */ + && memcmp (startp + 1, + &extra[idx + 1], c1) == 0) + /* Yep, this is the entry. */ + break; +# endif + } - if (symb_table[2 * elem] != 0) + if (elem < table_size) { /* Compare the byte sequence but only if this is not part of a range. */ -# if WIDE_CHAR_VERSION - int32_t *wextra; + if (! is_range - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - - wextra = (int32_t *) &extra[idx + 4]; -# endif - - if (! is_range) - { # if WIDE_CHAR_VERSION - for (c1 = 0; - (int32_t) c1 < wextra[idx]; - ++c1) - if (n[c1] != wextra[1 + c1]) - break; - - if ((int32_t) c1 == wextra[idx]) - goto matched; + && memcmp (n, &wextra[1], + c1 * sizeof (UCHAR)) == 0 # else - for (c1 = 0; c1 < extra[idx]; ++c1) - if (n[c1] != extra[1 + c1]) - break; - - if (c1 == extra[idx]) - goto matched; + && memcmp (n, &extra[idx + 1], c1) == 0 # endif + ) + { + n += c1 - 1; + goto matched; } /* Get the collation sequence value. */ @@ -597,9 +567,9 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, # if WIDE_CHAR_VERSION cold = wextra[1 + wextra[idx]]; # else - /* Adjust for the alignment. */ idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; cold = *((int32_t *) &extra[idx]); # endif @@ -609,10 +579,10 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { /* No valid character. Match it as a single byte. */ - if (!is_range && *n == str[0]) + if (!is_range && *n == startp[1]) goto matched; - cold = str[0]; + cold = startp[1]; c = *p++; } else @@ -620,7 +590,6 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, } } else -# undef str #endif { c = FOLD (c); @@ -712,25 +681,11 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { int32_t table_size; const int32_t *symb_table; -# if WIDE_CHAR_VERSION - char str[c1]; - unsigned int strcnt; -# else -# define str (startp + 1) -# endif const unsigned char *extra; int32_t idx; int32_t elem; - int32_t second; - int32_t hash; - # if WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; + int32_t *wextra; # endif table_size = @@ -743,71 +698,63 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - /* Locate the character in the hashing - table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - - if (symb_table[2 * elem] != 0) - { - /* Compare the byte sequence but only if - this is not part of a range. */ + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating + element. */ + idx += 1 + extra[idx]; # if WIDE_CHAR_VERSION - int32_t *wextra; - - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~4; - - wextra = (int32_t *) &extra[idx + 4]; + /* Skip the byte sequence of the + collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + + wextra = (int32_t *) &extra[idx + 4]; + + if (/* Compare the length of the + sequence. */ + c1 == wextra[0] + /* Compare the wide char sequence. */ + && memcmp (startp + 1, &wextra[1], + c1 * sizeof (int32_t)) == 0) + /* Yep, this is the entry. */ + break; +# else + if (/* Compare the length of the + sequence. */ + c1 == extra[idx] + /* Compare the byte sequence. */ + && memcmp (startp + 1, + &extra[idx + 1], c1) == 0) + /* Yep, this is the entry. */ + break; # endif + } + + if (elem < table_size) + { /* Get the collation sequence value. */ is_seqval = 1; # if WIDE_CHAR_VERSION cend = wextra[1 + wextra[idx]]; # else - /* Adjust for the alignment. */ idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; cend = *((int32_t *) &extra[idx]); # endif } - else if (symb_table[2 * elem] != 0 && c1 == 1) + else if (c1 == 1) { - cend = str[0]; + cend = startp[1]; c = *p++; } else return FNM_NOMATCH; } -# undef str } else { diff --git a/posix/tst-fnmatch4.c b/posix/tst-fnmatch4.c new file mode 100644 index 0000000000..9edeeb19db --- /dev/null +++ b/posix/tst-fnmatch4.c @@ -0,0 +1,50 @@ +/* Test for fnmatch handling of collating elements + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <locale.h> +#include <fnmatch.h> + +static int +do_test_locale (const char *locale) +{ + const char *pattern = "[[.ch.]]"; + + if (setlocale (LC_ALL, locale) == NULL) + { + printf ("could not set locale %s\n", locale); + return 1; + } + + if (fnmatch (pattern, "ch", 0) != 0) + { + printf ("%s didn't match in locale %s\n", pattern, locale); + return 1; + } + + return 0; +} + +static int +do_test (void) +{ + return (do_test_locale ("cs_CZ.ISO-8859-2") + || do_test_locale ("cs_CZ.UTF-8")); +} + +#include <support/test-driver.c> diff --git a/posix/tst-fnmatch5.c b/posix/tst-fnmatch5.c new file mode 100644 index 0000000000..c01abc9db5 --- /dev/null +++ b/posix/tst-fnmatch5.c @@ -0,0 +1,52 @@ +/* Test for fnmatch handling of collating elements + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fnmatch.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> + +#define LENGTH 20000000 + +static char pattern[LENGTH + 7]; + +static int +do_test (void) +{ + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + { + puts ("could not set locale"); + return 1; + } + pattern[0] = '['; + pattern[1] = '['; + pattern[2] = '.'; + memset (pattern + 3, 'a', LENGTH); + pattern[LENGTH + 3] = '.'; + pattern[LENGTH + 4] = ']'; + pattern[LENGTH + 5] = ']'; + int ret = fnmatch (pattern, "a", 0); + if (ret == 0) + { + puts ("fnmatch returned 0 for invalid pattern"); + return 1; + } + return 0; +} + +#include <support/test-driver.c>