Message ID | 20210720023658.1278155-1-siddhesh@sourceware.org |
---|---|
State | New |
Headers | show |
Series | [v2] setlocale: Fail if iconv module for charset is not present [BZ #27996] | expand |
Ping! On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote: > setlocale currently succeeds even if the requested locale uses a > charset that does not have a converter module installed. Check for > existence of the charset (either the one requested through the input > name or the one needed by the selected locale file) and fail if it > doesn't. > > The new test tst-invalid-charset verifes that loading test5 and test6 > locales fail because both locales have charsets without a converter, > viz. test5 and test6 respectively. Also, test6.c has been removed as > it was unused. > --- > Changes from v1: > - Find full transformation paths both ways instead of merely looking for > a FROM converter. > > locale/findlocale.c | 77 ++++++++++++----- > localedata/Makefile | 12 ++- > localedata/tests/test6.c | 137 ------------------------------- > localedata/tst-invalid-charset.c | 31 +++++++ > 4 files changed, 95 insertions(+), 162 deletions(-) > delete mode 100644 localedata/tests/test6.c > create mode 100644 localedata/tst-invalid-charset.c > > diff --git a/locale/findlocale.c b/locale/findlocale.c > index ab09122b0c..7ccc98cd8b 100644 > --- a/locale/findlocale.c > +++ b/locale/findlocale.c > @@ -98,6 +98,30 @@ valid_locale_name (const char *name) > return 1; > } > > +/* Return true if we have gconv modules to transform between the INTERNAL > + encoding and CODESET. */ > +static bool > +codeset_has_module (const char *codeset) > +{ > + struct __gconv_step *steps; > + size_t nsteps; > + > + char *ccodeset = (char *) alloca (strlen (codeset) + 3); > + strip (ccodeset, codeset); > + > + if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0) > + != __GCONV_OK) > + return false; > + __gconv_close_transform (steps, nsteps); > + > + if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0) > + != __GCONV_OK) > + return false; > + __gconv_close_transform (steps, nsteps); > + > + return true; > +} > + > struct __locale_data * > _nl_find_locale (const char *locale_path, size_t locale_path_len, > int category, const char **name) > @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, > /* Memory allocate problem. */ > return NULL; > > + /* The requested codeset does not have a converter, don't use it. */ > + if (codeset != NULL && !codeset_has_module (codeset)) > + return NULL; > + > /* If exactly this locale was already asked for we have an entry with > the complete name. */ > locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category], > @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, > return NULL; > } > > + /* Get the codeset information from the locale file. */ > + static const int codeset_idx[] = > + { > + [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), > + [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), > + [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), > + [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), > + [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), > + [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), > + [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), > + [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), > + [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), > + [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), > + [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), > + [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET) > + }; > + const struct __locale_data *data; > + const char *locale_codeset; > + > + data = (const struct __locale_data *) locale_file->data; > + locale_codeset = (const char *) data->values[codeset_idx[category]].string; > + assert (locale_codeset != NULL); > + > + /* The locale codeset does not have a converter, don't use it. */ > + if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset)) > + return NULL; > + > /* The LC_CTYPE category allows to check whether a locale is really > usable. If the locale name contains a charset name and the > charset name used in the locale (present in the LC_CTYPE data) is > @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, > in the locale name. */ > if (codeset != NULL) > { > - /* Get the codeset information from the locale file. */ > - static const int codeset_idx[] = > - { > - [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), > - [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), > - [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), > - [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), > - [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), > - [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), > - [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), > - [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), > - [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), > - [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), > - [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), > - [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET) > - }; > - const struct __locale_data *data; > - const char *locale_codeset; > char *clocale_codeset; > char *ccodeset; > > - data = (const struct __locale_data *) locale_file->data; > - locale_codeset = > - (const char *) data->values[codeset_idx[category]].string; > - assert (locale_codeset != NULL); > /* Note the length of the allocated memory: +3 for up to two slashes > and the NUL byte. */ > clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3); > diff --git a/localedata/Makefile b/localedata/Makefile > index 14e04cd3c5..2af399ec51 100644 > --- a/localedata/Makefile > +++ b/localedata/Makefile > @@ -124,11 +124,13 @@ test-input := \ > test-input-data = $(addsuffix .in, $(test-input)) > test-output := $(foreach s, .out .xout, \ > $(addsuffix $s, $(basename $(test-input)))) > +# Note that tst-invalid-charset depends on test5 and test6 being locales that > +# do not have valid charset converters. > ld-test-names := test1 test2 test3 test4 test5 test6 test7 > ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \ > $(addsuffix .def,$(ld-test-names)) \ > $(addsuffix .ds,test5 test6) \ > - test6.c trans.def) > + trans.def) > > fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \ > y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21 > @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \ > tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \ > tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \ > tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \ > - tst-wctype tst-iconv-math-trans > + tst-wctype tst-iconv-math-trans tst-invalid-charset > tests-static = bug-setlocale1-static > tests += $(tests-static) > ifeq (yes,$(build-shared)) > @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: tst-langinfo.sh \ > '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \ > $(evaluate-test) > > +# These tests depend on tst-locale because they use the locales compiled by > +# that test. > $(objpfx)tst-digits.out: $(objpfx)tst-locale.out > +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out > $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES)) > endif > > @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out > $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \ > $(evaluate-test) > > -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 > +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \ > + LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 > bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only) > > $(objdir)/iconvdata/gconv-modules: > diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c > deleted file mode 100644 > index edb5fe4a5f..0000000000 > --- a/localedata/tests/test6.c > +++ /dev/null > @@ -1,137 +0,0 @@ > -/* Test program for character classes and mappings. > - Copyright (C) 1999-2021 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#include <ctype.h> > -#include <locale.h> > -#include <wchar.h> > - > - > -int > -main (void) > -{ > - const char lower[] = "abcdefghijklmnopqrstuvwxyz"; > - const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; > -#define LEN (sizeof (upper) - 1) > - const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz"; > - const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; > - int i; > - int result = 0; > - > - setlocale (LC_ALL, "test6"); > - > - for (i = 0; i < LEN; ++i) > - { > - /* Test basic table handling (basic == not more than 256 characters). > - The charmaps swaps the normal lower-upper case meaning of the > - ASCII characters used in the source code while the Unicode mapping > - in the repertoire map has the normal correspondents. This test > - shows the independence of the tables for `char' and `wchar_t' > - characters. */ > - > - if (islower (lower[i])) > - { > - printf ("islower ('%c') false\n", lower[i]); > - result = 1; > - } > - if (! isupper (lower[i])) > - { > - printf ("isupper ('%c') false\n", lower[i]); > - result = 1; > - } > - > - if (! islower (upper[i])) > - { > - printf ("islower ('%c') false\n", upper[i]); > - result = 1; > - } > - if (isupper (upper[i])) > - { > - printf ("isupper ('%c') false\n", upper[i]); > - result = 1; > - } > - > - if (toupper (lower[i]) != lower[i]) > - { > - printf ("toupper ('%c') false\n", lower[i]); > - result = 1; > - } > - if (tolower (lower[i]) != upper[i]) > - { > - printf ("tolower ('%c') false\n", lower[i]); > - result = 1; > - } > - > - if (tolower (upper[i]) != upper[i]) > - { > - printf ("tolower ('%c') false\n", upper[i]); > - result = 1; > - } > - if (toupper (upper[i]) != lower[i]) > - { > - printf ("toupper ('%c') false\n", upper[i]); > - result = 1; > - } > - > - if (iswlower (wupper[i])) > - { > - printf ("iswlower (L'%c') false\n", upper[i]); > - result = 1; > - } > - if (! iswupper (wupper[i])) > - { > - printf ("iswupper (L'%c') false\n", upper[i]); > - result = 1; > - } > - > - if (iswupper (wlower[i])) > - { > - printf ("iswupper (L'%c') false\n", lower[i]); > - result = 1; > - } > - if (! iswlower (wlower[i])) > - { > - printf ("iswlower (L'%c') false\n", lower[i]); > - result = 1; > - } > - > - if (towupper (wlower[i]) != wupper[i]) > - { > - printf ("towupper ('%c') false\n", lower[i]); > - result = 1; > - } > - if (towlower (wlower[i]) != wlower[i]) > - { > - printf ("towlower ('%c') false\n", lower[i]); > - result = 1; > - } > - > - if (towlower (wupper[i]) != wlower[i]) > - { > - printf ("towlower ('%c') false\n", upper[i]); > - result = 1; > - } > - if (towupper (wupper[i]) != wupper[i]) > - { > - printf ("towupper ('%c') false\n", upper[i]); > - result = 1; > - } > - } > - > - return result; > -} > diff --git a/localedata/tst-invalid-charset.c b/localedata/tst-invalid-charset.c > new file mode 100644 > index 0000000000..46a5198c66 > --- /dev/null > +++ b/localedata/tst-invalid-charset.c > @@ -0,0 +1,31 @@ > +/* Test program to verify that setlocale fails for charsets that do not have a > + converter. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <ctype.h> > +#include <locale.h> > +#include <wchar.h> > + > + > +int > +main (void) > +{ > + /* Fail if setlocale succeeds for any of these locales. */ > + return (setlocale (LC_ALL, "test5") != NULL > + || setlocale (LC_ALL, "test6") != NULL); > +} >
Ping! On 8/11/21 1:12 PM, Siddhesh Poyarekar via Libc-alpha wrote: > Ping! > > On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote: >> setlocale currently succeeds even if the requested locale uses a >> charset that does not have a converter module installed. Check for >> existence of the charset (either the one requested through the input >> name or the one needed by the selected locale file) and fail if it >> doesn't. >> >> The new test tst-invalid-charset verifes that loading test5 and test6 >> locales fail because both locales have charsets without a converter, >> viz. test5 and test6 respectively. Also, test6.c has been removed as >> it was unused. >> --- >> Changes from v1: >> - Find full transformation paths both ways instead of merely looking for >> a FROM converter. >> >> locale/findlocale.c | 77 ++++++++++++----- >> localedata/Makefile | 12 ++- >> localedata/tests/test6.c | 137 ------------------------------- >> localedata/tst-invalid-charset.c | 31 +++++++ >> 4 files changed, 95 insertions(+), 162 deletions(-) >> delete mode 100644 localedata/tests/test6.c >> create mode 100644 localedata/tst-invalid-charset.c >> >> diff --git a/locale/findlocale.c b/locale/findlocale.c >> index ab09122b0c..7ccc98cd8b 100644 >> --- a/locale/findlocale.c >> +++ b/locale/findlocale.c >> @@ -98,6 +98,30 @@ valid_locale_name (const char *name) >> return 1; >> } >> +/* Return true if we have gconv modules to transform between the >> INTERNAL >> + encoding and CODESET. */ >> +static bool >> +codeset_has_module (const char *codeset) >> +{ >> + struct __gconv_step *steps; >> + size_t nsteps; >> + >> + char *ccodeset = (char *) alloca (strlen (codeset) + 3); >> + strip (ccodeset, codeset); >> + >> + if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0) >> + != __GCONV_OK) >> + return false; >> + __gconv_close_transform (steps, nsteps); >> + >> + if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0) >> + != __GCONV_OK) >> + return false; >> + __gconv_close_transform (steps, nsteps); >> + >> + return true; >> +} >> + >> struct __locale_data * >> _nl_find_locale (const char *locale_path, size_t locale_path_len, >> int category, const char **name) >> @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t >> locale_path_len, >> /* Memory allocate problem. */ >> return NULL; >> + /* The requested codeset does not have a converter, don't use it. */ >> + if (codeset != NULL && !codeset_has_module (codeset)) >> + return NULL; >> + >> /* If exactly this locale was already asked for we have an entry with >> the complete name. */ >> locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category], >> @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t >> locale_path_len, >> return NULL; >> } >> + /* Get the codeset information from the locale file. */ >> + static const int codeset_idx[] = >> + { >> + [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), >> + [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), >> + [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), >> + [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), >> + [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), >> + [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), >> + [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), >> + [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), >> + [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), >> + [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), >> + [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), >> + [__LC_IDENTIFICATION] = _NL_ITEM_INDEX >> (_NL_IDENTIFICATION_CODESET) >> + }; >> + const struct __locale_data *data; >> + const char *locale_codeset; >> + >> + data = (const struct __locale_data *) locale_file->data; >> + locale_codeset = (const char *) >> data->values[codeset_idx[category]].string; >> + assert (locale_codeset != NULL); >> + >> + /* The locale codeset does not have a converter, don't use it. */ >> + if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset)) >> + return NULL; >> + >> /* The LC_CTYPE category allows to check whether a locale is really >> usable. If the locale name contains a charset name and the >> charset name used in the locale (present in the LC_CTYPE data) is >> @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t >> locale_path_len, >> in the locale name. */ >> if (codeset != NULL) >> { >> - /* Get the codeset information from the locale file. */ >> - static const int codeset_idx[] = >> - { >> - [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), >> - [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), >> - [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), >> - [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), >> - [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), >> - [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), >> - [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), >> - [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), >> - [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), >> - [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), >> - [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), >> - [__LC_IDENTIFICATION] = _NL_ITEM_INDEX >> (_NL_IDENTIFICATION_CODESET) >> - }; >> - const struct __locale_data *data; >> - const char *locale_codeset; >> char *clocale_codeset; >> char *ccodeset; >> - data = (const struct __locale_data *) locale_file->data; >> - locale_codeset = >> - (const char *) data->values[codeset_idx[category]].string; >> - assert (locale_codeset != NULL); >> /* Note the length of the allocated memory: +3 for up to two >> slashes >> and the NUL byte. */ >> clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3); >> diff --git a/localedata/Makefile b/localedata/Makefile >> index 14e04cd3c5..2af399ec51 100644 >> --- a/localedata/Makefile >> +++ b/localedata/Makefile >> @@ -124,11 +124,13 @@ test-input := \ >> test-input-data = $(addsuffix .in, $(test-input)) >> test-output := $(foreach s, .out .xout, \ >> $(addsuffix $s, $(basename $(test-input)))) >> +# Note that tst-invalid-charset depends on test5 and test6 being >> locales that >> +# do not have valid charset converters. >> ld-test-names := test1 test2 test3 test4 test5 test6 test7 >> ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \ >> $(addsuffix .def,$(ld-test-names)) \ >> $(addsuffix .ds,test5 test6) \ >> - test6.c trans.def) >> + trans.def) >> fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \ >> y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21 >> @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits >> tst-setlocale bug-iconv-trans \ >> tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 >> tst-mbswcs5 \ >> tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \ >> tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 >> tst-setlocale3 \ >> - tst-wctype tst-iconv-math-trans >> + tst-wctype tst-iconv-math-trans tst-invalid-charset >> tests-static = bug-setlocale1-static >> tests += $(tests-static) >> ifeq (yes,$(build-shared)) >> @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: >> tst-langinfo.sh \ >> '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \ >> $(evaluate-test) >> +# These tests depend on tst-locale because they use the locales >> compiled by >> +# that test. >> $(objpfx)tst-digits.out: $(objpfx)tst-locale.out >> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out >> $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES)) >> endif >> @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out >> $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \ >> $(evaluate-test) >> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 >> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \ >> + LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 >> bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only) >> $(objdir)/iconvdata/gconv-modules: >> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c >> deleted file mode 100644 >> index edb5fe4a5f..0000000000 >> --- a/localedata/tests/test6.c >> +++ /dev/null >> @@ -1,137 +0,0 @@ >> -/* Test program for character classes and mappings. >> - Copyright (C) 1999-2021 Free Software Foundation, Inc. >> - This file is part of the GNU C Library. >> - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. >> - >> - The GNU C Library is free software; you can redistribute it and/or >> - modify it under the terms of the GNU Lesser General Public >> - License as published by the Free Software Foundation; either >> - version 2.1 of the License, or (at your option) any later version. >> - >> - The GNU C Library is distributed in the hope that it will be useful, >> - but WITHOUT ANY WARRANTY; without even the implied warranty of >> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> - Lesser General Public License for more details. >> - >> - You should have received a copy of the GNU Lesser General Public >> - License along with the GNU C Library; if not, see >> - <https://www.gnu.org/licenses/>. */ >> - >> -#include <ctype.h> >> -#include <locale.h> >> -#include <wchar.h> >> - >> - >> -int >> -main (void) >> -{ >> - const char lower[] = "abcdefghijklmnopqrstuvwxyz"; >> - const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; >> -#define LEN (sizeof (upper) - 1) >> - const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz"; >> - const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; >> - int i; >> - int result = 0; >> - >> - setlocale (LC_ALL, "test6"); >> - >> - for (i = 0; i < LEN; ++i) >> - { >> - /* Test basic table handling (basic == not more than 256 >> characters). >> - The charmaps swaps the normal lower-upper case meaning of the >> - ASCII characters used in the source code while the Unicode mapping >> - in the repertoire map has the normal correspondents. This test >> - shows the independence of the tables for `char' and `wchar_t' >> - characters. */ >> - >> - if (islower (lower[i])) >> - { >> - printf ("islower ('%c') false\n", lower[i]); >> - result = 1; >> - } >> - if (! isupper (lower[i])) >> - { >> - printf ("isupper ('%c') false\n", lower[i]); >> - result = 1; >> - } >> - >> - if (! islower (upper[i])) >> - { >> - printf ("islower ('%c') false\n", upper[i]); >> - result = 1; >> - } >> - if (isupper (upper[i])) >> - { >> - printf ("isupper ('%c') false\n", upper[i]); >> - result = 1; >> - } >> - >> - if (toupper (lower[i]) != lower[i]) >> - { >> - printf ("toupper ('%c') false\n", lower[i]); >> - result = 1; >> - } >> - if (tolower (lower[i]) != upper[i]) >> - { >> - printf ("tolower ('%c') false\n", lower[i]); >> - result = 1; >> - } >> - >> - if (tolower (upper[i]) != upper[i]) >> - { >> - printf ("tolower ('%c') false\n", upper[i]); >> - result = 1; >> - } >> - if (toupper (upper[i]) != lower[i]) >> - { >> - printf ("toupper ('%c') false\n", upper[i]); >> - result = 1; >> - } >> - >> - if (iswlower (wupper[i])) >> - { >> - printf ("iswlower (L'%c') false\n", upper[i]); >> - result = 1; >> - } >> - if (! iswupper (wupper[i])) >> - { >> - printf ("iswupper (L'%c') false\n", upper[i]); >> - result = 1; >> - } >> - >> - if (iswupper (wlower[i])) >> - { >> - printf ("iswupper (L'%c') false\n", lower[i]); >> - result = 1; >> - } >> - if (! iswlower (wlower[i])) >> - { >> - printf ("iswlower (L'%c') false\n", lower[i]); >> - result = 1; >> - } >> - >> - if (towupper (wlower[i]) != wupper[i]) >> - { >> - printf ("towupper ('%c') false\n", lower[i]); >> - result = 1; >> - } >> - if (towlower (wlower[i]) != wlower[i]) >> - { >> - printf ("towlower ('%c') false\n", lower[i]); >> - result = 1; >> - } >> - >> - if (towlower (wupper[i]) != wlower[i]) >> - { >> - printf ("towlower ('%c') false\n", upper[i]); >> - result = 1; >> - } >> - if (towupper (wupper[i]) != wupper[i]) >> - { >> - printf ("towupper ('%c') false\n", upper[i]); >> - result = 1; >> - } >> - } >> - >> - return result; >> -} >> diff --git a/localedata/tst-invalid-charset.c >> b/localedata/tst-invalid-charset.c >> new file mode 100644 >> index 0000000000..46a5198c66 >> --- /dev/null >> +++ b/localedata/tst-invalid-charset.c >> @@ -0,0 +1,31 @@ >> +/* Test program to verify that setlocale fails for charsets that do >> not have a >> + converter. >> + Copyright (C) 2021 Free Software Foundation, Inc. >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#include <ctype.h> >> +#include <locale.h> >> +#include <wchar.h> >> + >> + >> +int >> +main (void) >> +{ >> + /* Fail if setlocale succeeds for any of these locales. */ >> + return (setlocale (LC_ALL, "test5") != NULL >> + || setlocale (LC_ALL, "test6") != NULL); >> +} >> >
Ping! On 8/17/21 08:28, Siddhesh Poyarekar wrote: > Ping! > > On 8/11/21 1:12 PM, Siddhesh Poyarekar via Libc-alpha wrote: >> Ping! >> >> On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote: >>> setlocale currently succeeds even if the requested locale uses a >>> charset that does not have a converter module installed. Check for >>> existence of the charset (either the one requested through the input >>> name or the one needed by the selected locale file) and fail if it >>> doesn't. >>> >>> The new test tst-invalid-charset verifes that loading test5 and test6 >>> locales fail because both locales have charsets without a converter, >>> viz. test5 and test6 respectively. Also, test6.c has been removed as >>> it was unused. >>> --- >>> Changes from v1: >>> - Find full transformation paths both ways instead of merely looking for >>> a FROM converter. >>> >>> locale/findlocale.c | 77 ++++++++++++----- >>> localedata/Makefile | 12 ++- >>> localedata/tests/test6.c | 137 ------------------------------- >>> localedata/tst-invalid-charset.c | 31 +++++++ >>> 4 files changed, 95 insertions(+), 162 deletions(-) >>> delete mode 100644 localedata/tests/test6.c >>> create mode 100644 localedata/tst-invalid-charset.c >>> >>> diff --git a/locale/findlocale.c b/locale/findlocale.c >>> index ab09122b0c..7ccc98cd8b 100644 >>> --- a/locale/findlocale.c >>> +++ b/locale/findlocale.c >>> @@ -98,6 +98,30 @@ valid_locale_name (const char *name) >>> return 1; >>> } >>> +/* Return true if we have gconv modules to transform between the >>> INTERNAL >>> + encoding and CODESET. */ >>> +static bool >>> +codeset_has_module (const char *codeset) >>> +{ >>> + struct __gconv_step *steps; >>> + size_t nsteps; >>> + >>> + char *ccodeset = (char *) alloca (strlen (codeset) + 3); >>> + strip (ccodeset, codeset); >>> + >>> + if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0) >>> + != __GCONV_OK) >>> + return false; >>> + __gconv_close_transform (steps, nsteps); >>> + >>> + if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0) >>> + != __GCONV_OK) >>> + return false; >>> + __gconv_close_transform (steps, nsteps); >>> + >>> + return true; >>> +} >>> + >>> struct __locale_data * >>> _nl_find_locale (const char *locale_path, size_t locale_path_len, >>> int category, const char **name) >>> @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t >>> locale_path_len, >>> /* Memory allocate problem. */ >>> return NULL; >>> + /* The requested codeset does not have a converter, don't use it. */ >>> + if (codeset != NULL && !codeset_has_module (codeset)) >>> + return NULL; >>> + >>> /* If exactly this locale was already asked for we have an entry >>> with >>> the complete name. */ >>> locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category], >>> @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t >>> locale_path_len, >>> return NULL; >>> } >>> + /* Get the codeset information from the locale file. */ >>> + static const int codeset_idx[] = >>> + { >>> + [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), >>> + [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), >>> + [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), >>> + [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), >>> + [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), >>> + [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), >>> + [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), >>> + [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), >>> + [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), >>> + [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), >>> + [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), >>> + [__LC_IDENTIFICATION] = _NL_ITEM_INDEX >>> (_NL_IDENTIFICATION_CODESET) >>> + }; >>> + const struct __locale_data *data; >>> + const char *locale_codeset; >>> + >>> + data = (const struct __locale_data *) locale_file->data; >>> + locale_codeset = (const char *) >>> data->values[codeset_idx[category]].string; >>> + assert (locale_codeset != NULL); >>> + >>> + /* The locale codeset does not have a converter, don't use it. */ >>> + if (locale_codeset[0] != '\0' && !codeset_has_module >>> (locale_codeset)) >>> + return NULL; >>> + >>> /* The LC_CTYPE category allows to check whether a locale is really >>> usable. If the locale name contains a charset name and the >>> charset name used in the locale (present in the LC_CTYPE data) is >>> @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t >>> locale_path_len, >>> in the locale name. */ >>> if (codeset != NULL) >>> { >>> - /* Get the codeset information from the locale file. */ >>> - static const int codeset_idx[] = >>> - { >>> - [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), >>> - [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), >>> - [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), >>> - [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), >>> - [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), >>> - [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), >>> - [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), >>> - [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), >>> - [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), >>> - [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), >>> - [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), >>> - [__LC_IDENTIFICATION] = _NL_ITEM_INDEX >>> (_NL_IDENTIFICATION_CODESET) >>> - }; >>> - const struct __locale_data *data; >>> - const char *locale_codeset; >>> char *clocale_codeset; >>> char *ccodeset; >>> - data = (const struct __locale_data *) locale_file->data; >>> - locale_codeset = >>> - (const char *) data->values[codeset_idx[category]].string; >>> - assert (locale_codeset != NULL); >>> /* Note the length of the allocated memory: +3 for up to two >>> slashes >>> and the NUL byte. */ >>> clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3); >>> diff --git a/localedata/Makefile b/localedata/Makefile >>> index 14e04cd3c5..2af399ec51 100644 >>> --- a/localedata/Makefile >>> +++ b/localedata/Makefile >>> @@ -124,11 +124,13 @@ test-input := \ >>> test-input-data = $(addsuffix .in, $(test-input)) >>> test-output := $(foreach s, .out .xout, \ >>> $(addsuffix $s, $(basename $(test-input)))) >>> +# Note that tst-invalid-charset depends on test5 and test6 being >>> locales that >>> +# do not have valid charset converters. >>> ld-test-names := test1 test2 test3 test4 test5 test6 test7 >>> ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \ >>> $(addsuffix .def,$(ld-test-names)) \ >>> $(addsuffix .ds,test5 test6) \ >>> - test6.c trans.def) >>> + trans.def) >>> fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \ >>> y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21 >>> @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits >>> tst-setlocale bug-iconv-trans \ >>> tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 >>> tst-mbswcs5 \ >>> tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \ >>> tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 >>> tst-setlocale3 \ >>> - tst-wctype tst-iconv-math-trans >>> + tst-wctype tst-iconv-math-trans tst-invalid-charset >>> tests-static = bug-setlocale1-static >>> tests += $(tests-static) >>> ifeq (yes,$(build-shared)) >>> @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: >>> tst-langinfo.sh \ >>> '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \ >>> $(evaluate-test) >>> +# These tests depend on tst-locale because they use the locales >>> compiled by >>> +# that test. >>> $(objpfx)tst-digits.out: $(objpfx)tst-locale.out >>> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out >>> $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES)) >>> endif >>> @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: >>> $(objpfx)tst-leaks.out >>> $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \ >>> $(evaluate-test) >>> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 >>> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \ >>> + LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 >>> bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only) >>> $(objdir)/iconvdata/gconv-modules: >>> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c >>> deleted file mode 100644 >>> index edb5fe4a5f..0000000000 >>> --- a/localedata/tests/test6.c >>> +++ /dev/null >>> @@ -1,137 +0,0 @@ >>> -/* Test program for character classes and mappings. >>> - Copyright (C) 1999-2021 Free Software Foundation, Inc. >>> - This file is part of the GNU C Library. >>> - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. >>> - >>> - The GNU C Library is free software; you can redistribute it and/or >>> - modify it under the terms of the GNU Lesser General Public >>> - License as published by the Free Software Foundation; either >>> - version 2.1 of the License, or (at your option) any later version. >>> - >>> - The GNU C Library is distributed in the hope that it will be useful, >>> - but WITHOUT ANY WARRANTY; without even the implied warranty of >>> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> - Lesser General Public License for more details. >>> - >>> - You should have received a copy of the GNU Lesser General Public >>> - License along with the GNU C Library; if not, see >>> - <https://www.gnu.org/licenses/>. */ >>> - >>> -#include <ctype.h> >>> -#include <locale.h> >>> -#include <wchar.h> >>> - >>> - >>> -int >>> -main (void) >>> -{ >>> - const char lower[] = "abcdefghijklmnopqrstuvwxyz"; >>> - const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; >>> -#define LEN (sizeof (upper) - 1) >>> - const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz"; >>> - const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; >>> - int i; >>> - int result = 0; >>> - >>> - setlocale (LC_ALL, "test6"); >>> - >>> - for (i = 0; i < LEN; ++i) >>> - { >>> - /* Test basic table handling (basic == not more than 256 >>> characters). >>> - The charmaps swaps the normal lower-upper case meaning of the >>> - ASCII characters used in the source code while the Unicode mapping >>> - in the repertoire map has the normal correspondents. This test >>> - shows the independence of the tables for `char' and `wchar_t' >>> - characters. */ >>> - >>> - if (islower (lower[i])) >>> - { >>> - printf ("islower ('%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - if (! isupper (lower[i])) >>> - { >>> - printf ("isupper ('%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - >>> - if (! islower (upper[i])) >>> - { >>> - printf ("islower ('%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - if (isupper (upper[i])) >>> - { >>> - printf ("isupper ('%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - >>> - if (toupper (lower[i]) != lower[i]) >>> - { >>> - printf ("toupper ('%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - if (tolower (lower[i]) != upper[i]) >>> - { >>> - printf ("tolower ('%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - >>> - if (tolower (upper[i]) != upper[i]) >>> - { >>> - printf ("tolower ('%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - if (toupper (upper[i]) != lower[i]) >>> - { >>> - printf ("toupper ('%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - >>> - if (iswlower (wupper[i])) >>> - { >>> - printf ("iswlower (L'%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - if (! iswupper (wupper[i])) >>> - { >>> - printf ("iswupper (L'%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - >>> - if (iswupper (wlower[i])) >>> - { >>> - printf ("iswupper (L'%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - if (! iswlower (wlower[i])) >>> - { >>> - printf ("iswlower (L'%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - >>> - if (towupper (wlower[i]) != wupper[i]) >>> - { >>> - printf ("towupper ('%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - if (towlower (wlower[i]) != wlower[i]) >>> - { >>> - printf ("towlower ('%c') false\n", lower[i]); >>> - result = 1; >>> - } >>> - >>> - if (towlower (wupper[i]) != wlower[i]) >>> - { >>> - printf ("towlower ('%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - if (towupper (wupper[i]) != wupper[i]) >>> - { >>> - printf ("towupper ('%c') false\n", upper[i]); >>> - result = 1; >>> - } >>> - } >>> - >>> - return result; >>> -} >>> diff --git a/localedata/tst-invalid-charset.c >>> b/localedata/tst-invalid-charset.c >>> new file mode 100644 >>> index 0000000000..46a5198c66 >>> --- /dev/null >>> +++ b/localedata/tst-invalid-charset.c >>> @@ -0,0 +1,31 @@ >>> +/* Test program to verify that setlocale fails for charsets that do >>> not have a >>> + converter. >>> + Copyright (C) 2021 Free Software Foundation, Inc. >>> + This file is part of the GNU C Library. >>> + >>> + The GNU C Library is free software; you can redistribute it and/or >>> + modify it under the terms of the GNU Lesser General Public >>> + License as published by the Free Software Foundation; either >>> + version 2.1 of the License, or (at your option) any later version. >>> + >>> + The GNU C Library is distributed in the hope that it will be useful, >>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> + Lesser General Public License for more details. >>> + >>> + You should have received a copy of the GNU Lesser General Public >>> + License along with the GNU C Library; if not, see >>> + <https://www.gnu.org/licenses/>. */ >>> + >>> +#include <ctype.h> >>> +#include <locale.h> >>> +#include <wchar.h> >>> + >>> + >>> +int >>> +main (void) >>> +{ >>> + /* Fail if setlocale succeeds for any of these locales. */ >>> + return (setlocale (LC_ALL, "test5") != NULL >>> + || setlocale (LC_ALL, "test6") != NULL); >>> +} >>> >> >
diff --git a/locale/findlocale.c b/locale/findlocale.c index ab09122b0c..7ccc98cd8b 100644 --- a/locale/findlocale.c +++ b/locale/findlocale.c @@ -98,6 +98,30 @@ valid_locale_name (const char *name) return 1; } +/* Return true if we have gconv modules to transform between the INTERNAL + encoding and CODESET. */ +static bool +codeset_has_module (const char *codeset) +{ + struct __gconv_step *steps; + size_t nsteps; + + char *ccodeset = (char *) alloca (strlen (codeset) + 3); + strip (ccodeset, codeset); + + if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0) + != __GCONV_OK) + return false; + __gconv_close_transform (steps, nsteps); + + if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0) + != __GCONV_OK) + return false; + __gconv_close_transform (steps, nsteps); + + return true; +} + struct __locale_data * _nl_find_locale (const char *locale_path, size_t locale_path_len, int category, const char **name) @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, /* Memory allocate problem. */ return NULL; + /* The requested codeset does not have a converter, don't use it. */ + if (codeset != NULL && !codeset_has_module (codeset)) + return NULL; + /* If exactly this locale was already asked for we have an entry with the complete name. */ locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category], @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, return NULL; } + /* Get the codeset information from the locale file. */ + static const int codeset_idx[] = + { + [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), + [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), + [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), + [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), + [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), + [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), + [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), + [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), + [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), + [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), + [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), + [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET) + }; + const struct __locale_data *data; + const char *locale_codeset; + + data = (const struct __locale_data *) locale_file->data; + locale_codeset = (const char *) data->values[codeset_idx[category]].string; + assert (locale_codeset != NULL); + + /* The locale codeset does not have a converter, don't use it. */ + if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset)) + return NULL; + /* The LC_CTYPE category allows to check whether a locale is really usable. If the locale name contains a charset name and the charset name used in the locale (present in the LC_CTYPE data) is @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, in the locale name. */ if (codeset != NULL) { - /* Get the codeset information from the locale file. */ - static const int codeset_idx[] = - { - [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET), - [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET), - [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET), - [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET), - [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET), - [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET), - [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET), - [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET), - [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET), - [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET), - [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET), - [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET) - }; - const struct __locale_data *data; - const char *locale_codeset; char *clocale_codeset; char *ccodeset; - data = (const struct __locale_data *) locale_file->data; - locale_codeset = - (const char *) data->values[codeset_idx[category]].string; - assert (locale_codeset != NULL); /* Note the length of the allocated memory: +3 for up to two slashes and the NUL byte. */ clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3); diff --git a/localedata/Makefile b/localedata/Makefile index 14e04cd3c5..2af399ec51 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -124,11 +124,13 @@ test-input := \ test-input-data = $(addsuffix .in, $(test-input)) test-output := $(foreach s, .out .xout, \ $(addsuffix $s, $(basename $(test-input)))) +# Note that tst-invalid-charset depends on test5 and test6 being locales that +# do not have valid charset converters. ld-test-names := test1 test2 test3 test4 test5 test6 test7 ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \ $(addsuffix .def,$(ld-test-names)) \ $(addsuffix .ds,test5 test6) \ - test6.c trans.def) + trans.def) fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \ y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21 @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \ tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \ tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \ tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \ - tst-wctype tst-iconv-math-trans + tst-wctype tst-iconv-math-trans tst-invalid-charset tests-static = bug-setlocale1-static tests += $(tests-static) ifeq (yes,$(build-shared)) @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: tst-langinfo.sh \ '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \ $(evaluate-test) +# These tests depend on tst-locale because they use the locales compiled by +# that test. $(objpfx)tst-digits.out: $(objpfx)tst-locale.out +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES)) endif @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \ $(evaluate-test) -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \ + LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8 bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only) $(objdir)/iconvdata/gconv-modules: diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c deleted file mode 100644 index edb5fe4a5f..0000000000 --- a/localedata/tests/test6.c +++ /dev/null @@ -1,137 +0,0 @@ -/* Test program for character classes and mappings. - Copyright (C) 1999-2021 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <ctype.h> -#include <locale.h> -#include <wchar.h> - - -int -main (void) -{ - const char lower[] = "abcdefghijklmnopqrstuvwxyz"; - const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -#define LEN (sizeof (upper) - 1) - const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz"; - const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - int i; - int result = 0; - - setlocale (LC_ALL, "test6"); - - for (i = 0; i < LEN; ++i) - { - /* Test basic table handling (basic == not more than 256 characters). - The charmaps swaps the normal lower-upper case meaning of the - ASCII characters used in the source code while the Unicode mapping - in the repertoire map has the normal correspondents. This test - shows the independence of the tables for `char' and `wchar_t' - characters. */ - - if (islower (lower[i])) - { - printf ("islower ('%c') false\n", lower[i]); - result = 1; - } - if (! isupper (lower[i])) - { - printf ("isupper ('%c') false\n", lower[i]); - result = 1; - } - - if (! islower (upper[i])) - { - printf ("islower ('%c') false\n", upper[i]); - result = 1; - } - if (isupper (upper[i])) - { - printf ("isupper ('%c') false\n", upper[i]); - result = 1; - } - - if (toupper (lower[i]) != lower[i]) - { - printf ("toupper ('%c') false\n", lower[i]); - result = 1; - } - if (tolower (lower[i]) != upper[i]) - { - printf ("tolower ('%c') false\n", lower[i]); - result = 1; - } - - if (tolower (upper[i]) != upper[i]) - { - printf ("tolower ('%c') false\n", upper[i]); - result = 1; - } - if (toupper (upper[i]) != lower[i]) - { - printf ("toupper ('%c') false\n", upper[i]); - result = 1; - } - - if (iswlower (wupper[i])) - { - printf ("iswlower (L'%c') false\n", upper[i]); - result = 1; - } - if (! iswupper (wupper[i])) - { - printf ("iswupper (L'%c') false\n", upper[i]); - result = 1; - } - - if (iswupper (wlower[i])) - { - printf ("iswupper (L'%c') false\n", lower[i]); - result = 1; - } - if (! iswlower (wlower[i])) - { - printf ("iswlower (L'%c') false\n", lower[i]); - result = 1; - } - - if (towupper (wlower[i]) != wupper[i]) - { - printf ("towupper ('%c') false\n", lower[i]); - result = 1; - } - if (towlower (wlower[i]) != wlower[i]) - { - printf ("towlower ('%c') false\n", lower[i]); - result = 1; - } - - if (towlower (wupper[i]) != wlower[i]) - { - printf ("towlower ('%c') false\n", upper[i]); - result = 1; - } - if (towupper (wupper[i]) != wupper[i]) - { - printf ("towupper ('%c') false\n", upper[i]); - result = 1; - } - } - - return result; -} diff --git a/localedata/tst-invalid-charset.c b/localedata/tst-invalid-charset.c new file mode 100644 index 0000000000..46a5198c66 --- /dev/null +++ b/localedata/tst-invalid-charset.c @@ -0,0 +1,31 @@ +/* Test program to verify that setlocale fails for charsets that do not have a + converter. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <ctype.h> +#include <locale.h> +#include <wchar.h> + + +int +main (void) +{ + /* Fail if setlocale succeeds for any of these locales. */ + return (setlocale (LC_ALL, "test5") != NULL + || setlocale (LC_ALL, "test6") != NULL); +}