Message ID | 20240409124205.1512915-1-carlos@redhat.com |
---|---|
State | New |
Headers | show |
Series | [v2] locale: Handle loading a missing locale twice (Bug 14247) | expand |
On 09/04/24 09:41, Carlos O'Donell wrote: > Delay setting file->decided until the data has been successfully loaded > by _nl_load_locale(). If the function fails to load the data then we > must return and error and leave decided untouched to allow the caller to > attempt to load the data again at a later time. We should not set > decided to 1 early in the function since doing so may prevent attempting > to load it again. We want to try loading it again because that allows an > open to fail and set errno correctly. > > On the other side of this problem is that if we are called again with > the same inputs we will fetch the cached version of the object and carry > out no open syscalls and that fails to set errno so we must set errno to > ENOENT in that case. There is a second code path that has to be handled > where the name of the locale matches but the codeset doesn't match. > > These changes ensure that errno is correctly set on failure in all the > return paths in _nl_find_locale(). > > Adds tst-locale-loadlocale to cover the bug. > > No regressions on x86_64. > > Co-authored-by: Jeff Law <law@redhat.com> LGTM, thanks. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > gen-locales.mk | 13 +++++- > locale/findlocale.c | 19 +++++++-- > locale/loadlocale.c | 2 +- > localedata/Makefile | 4 ++ > localedata/gen-locale.sh | 24 ++++++++--- > localedata/tst-locale-loadlocale.c | 67 ++++++++++++++++++++++++++++++ > 6 files changed, 119 insertions(+), 10 deletions(-) > create mode 100644 localedata/tst-locale-loadlocale.c > > diff --git a/gen-locales.mk b/gen-locales.mk > index 9c523d2a05..b005a72866 100644 > --- a/gen-locales.mk > +++ b/gen-locales.mk > @@ -1,8 +1,19 @@ > # defines target $(gen-locales) that generates the locales given in $(LOCALES) > > LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^@ ]*\(@[^ ]*\)\?/\1\2/g') > +# The CHARMAPS dependency handling must be able to process: > +# 1. No character map e.g. eo, en_US > +# 2. Character maps e.g. en_US.UTF-8 > +# 3. Character maps with modifier e.g. tt_RU.UTF-8@iqtelif > +# This complicates the processing slightly so we do it in multiple edits, > +# the first captures the character map with the anchoring period while > +# the rest of the edits remove the period to get a valid file or adjust > +# the name to match the true name. > CHARMAPS := $(shell echo "$(LOCALES)" | \ > - sed -e 's/[^ .]*[.]\([^@ ]*\)\(@[^@ ]*\)*/\1/g' -e s/SJIS/SHIFT_JIS/g) > + sed -e 's/\([^ .]*\)\([^@ ]*\)\(@[^@ ]*\)*/\2/g' \ > + -e 's/^\./ /g' \ > + -e 's/ \./ /g' \ > + -e s/SJIS/SHIFT_JIS/g) > CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) > gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) > > diff --git a/locale/findlocale.c b/locale/findlocale.c > index 8d6e4e33e3..43ff7201c1 100644 > --- a/locale/findlocale.c > +++ b/locale/findlocale.c > @@ -244,7 +244,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, > locale_file = locale_file->successor[cnt]; > > if (locale_file == NULL) > - return NULL; > + { > + /* If this is the second time we tried to load a failed > + locale then the locale_file value comes from the cache > + and we will not carry out any actual filesystem > + operations so we must set ENOENT here. */ > + __set_errno (ENOENT); > + return NULL; > + } > } > > /* The LC_CTYPE category allows to check whether a locale is really > @@ -291,8 +298,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, > if (__gconv_compare_alias (upstr (ccodeset, ccodeset), > upstr (clocale_codeset, > clocale_codeset)) != 0) > - /* The codesets are not identical, don't use the locale. */ > - return NULL; > + { > + /* The codesets are not identical, don't use the locale. > + If this is the second time we tried to load a locale > + whose codeset doesn't match then the result came from > + the cache and must set ENOENT here. */ > + __set_errno (ENOENT); > + return NULL; > + } > } > > /* Determine the locale name for which loading succeeded. This > diff --git a/locale/loadlocale.c b/locale/loadlocale.c > index 1e5f93e927..991c0591e9 100644 > --- a/locale/loadlocale.c > +++ b/locale/loadlocale.c > @@ -237,7 +237,6 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) > int save_err; > int alloc = ld_mapped; > > - file->decided = 1; > file->data = NULL; > > fd = __open_nocancel (file->filename, O_RDONLY | O_CLOEXEC); > @@ -345,6 +344,7 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) > newdata->alloc = alloc; > > file->data = newdata; > + file->decided = 1; > } > > void > diff --git a/localedata/Makefile b/localedata/Makefile > index 713e7aebad..6d0bac225b 100644 > --- a/localedata/Makefile > +++ b/localedata/Makefile > @@ -239,6 +239,7 @@ tests = \ > tst-iconv-emojis-trans \ > tst-iconv-math-trans \ > tst-leaks \ > + tst-locale-loadlocale \ > tst-mbswcs1 \ > tst-mbswcs2 \ > tst-mbswcs3 \ > @@ -325,6 +326,7 @@ LOCALES := \ > dsb_DE.UTF-8 \ > dz_BT.UTF-8 \ > en_GB.UTF-8 \ > + en_US \ > en_US.ANSI_X3.4-1968 \ > en_US.ISO-8859-1\ > en_US.UTF-8 \ > @@ -406,6 +408,8 @@ include ../gen-locales.mk > $(objpfx)tst-iconv-emojis-trans.out: $(gen-locales) > > $(objpfx)tst-iconv-math-trans.out: $(gen-locales) > +# tst-locale-loadlocale: Needs an en_US-named locale for the test. > +$(objpfx)tst-locale-loadlocale.out: $(gen-locales) > endif > > include ../Rules > diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh > index b8c422ad13..e400d02b82 100644 > --- a/localedata/gen-locale.sh > +++ b/localedata/gen-locale.sh > @@ -48,9 +48,9 @@ generate_locale () > } > > locfile=`echo $locfile|sed 's|.*/\([^/]*/LC_CTYPE\)|\1|'` > -locale=`echo $locfile|sed 's|\([^.]*\)[.].*/LC_CTYPE|\1|'` > -charmap=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|'` > -modifier=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` > +locale=`echo $locfile|sed 's|\([^.]*\).*/LC_CTYPE|\1|'` > +charmap=`echo $locfile|sed -e 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|' -e 's|^\.||g'` > +modifier=`echo $locfile|sed 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` > > echo "Generating locale $locale.$charmap: this might take a while..." > > @@ -63,10 +63,16 @@ echo "Generating locale $locale.$charmap: this might take a while..." > # you to develop in-progress locales. > flags="" > > +charmap_real="$charmap" > + > +# If no character map is specified then we fall back to UTF-8. > +if [ -z "$charmap" ]; then > + charmap_real="UTF-8" > +fi > + > # For SJIS the charmap is SHIFT_JIS. We just want the locale to have > # a slightly nicer name instead of using "*.SHIFT_SJIS", but that > # means we need a mapping here. > -charmap_real="$charmap" > if [ "$charmap" = "SJIS" ]; then > charmap_real="SHIFT_JIS" > fi > @@ -80,4 +86,12 @@ if [ "$charmap_real" = 'SHIFT_JIS' ] \ > flags="$flags --no-warnings=ascii" > fi > > -generate_locale $charmap_real $locale$modifier $locale.$charmap$modifier "$flags" > +# If the character map is not specified then we output a locale > +# with the just the name of the locale e.g. eo, en_US. This is > +# used for test locales that act as fallbacks. > +output_file="$locale.$charmap$modifier" > +if [ -z "$charmap" ]; then > + output_file="$locale" > +fi > + > +generate_locale $charmap_real $locale$modifier $output_file "$flags" > diff --git a/localedata/tst-locale-loadlocale.c b/localedata/tst-locale-loadlocale.c > new file mode 100644 > index 0000000000..64796e975d > --- /dev/null > +++ b/localedata/tst-locale-loadlocale.c > @@ -0,0 +1,67 @@ > +/* Test for locale loading error handling (Bug 14247) > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <stdio.h> > +#include <errno.h> > +#include <locale.h> > +#include <support/check.h> > +#include <support/support.h> > + > +static int > +do_test (void) > +{ > + locale_t loc = NULL; > + > + /* We must load an en_US-based locale for the final test to fail. > + The locale loading code will find the en_US locale already loaded > + but the codesets won't match. */ > + xsetlocale (LC_ALL, "en_US.UTF-8"); > + > + /* Call newlocale with an invalid locale. We expect the test system > + does not have "invalidlocale" locale. */ > + errno = 0; > + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); > + TEST_VERIFY (loc == NULL && errno != 0); > + > + /* Call newlocale again with the same name. This triggers bug 14247 where > + the second call fails to set errno correctly. */ > + errno = 0; > + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); > + TEST_VERIFY (loc == NULL && errno != 0); > + > + /* Next we attempt to load a locale that exists but whose codeset > + does not match the codeset of the locale on the system. > + This is more difficult to test but we rely on the fact that locale > + processing will normalize the locale name and attempt to open > + "en_US" with no codeset as a fallback and this will allow us to > + compare a loaded "en_US" locale with a UTF-8 codeset to the > + ficiticious "en_US.utf99" and get a codeset match failure. */ > + errno = 0; > + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); > + TEST_VERIFY (loc == NULL && errno != 0); > + > + /* Call newlocale again with the same name. This triggers bug 14247 where > + the second call fails to set errno correctly. */ > + errno = 0; > + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); > + TEST_VERIFY (loc == NULL && errno != 0); > + > + return 0; > +} > + > +#include <support/test-driver.c>
diff --git a/gen-locales.mk b/gen-locales.mk index 9c523d2a05..b005a72866 100644 --- a/gen-locales.mk +++ b/gen-locales.mk @@ -1,8 +1,19 @@ # defines target $(gen-locales) that generates the locales given in $(LOCALES) LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^@ ]*\(@[^ ]*\)\?/\1\2/g') +# The CHARMAPS dependency handling must be able to process: +# 1. No character map e.g. eo, en_US +# 2. Character maps e.g. en_US.UTF-8 +# 3. Character maps with modifier e.g. tt_RU.UTF-8@iqtelif +# This complicates the processing slightly so we do it in multiple edits, +# the first captures the character map with the anchoring period while +# the rest of the edits remove the period to get a valid file or adjust +# the name to match the true name. CHARMAPS := $(shell echo "$(LOCALES)" | \ - sed -e 's/[^ .]*[.]\([^@ ]*\)\(@[^@ ]*\)*/\1/g' -e s/SJIS/SHIFT_JIS/g) + sed -e 's/\([^ .]*\)\([^@ ]*\)\(@[^@ ]*\)*/\2/g' \ + -e 's/^\./ /g' \ + -e 's/ \./ /g' \ + -e s/SJIS/SHIFT_JIS/g) CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) diff --git a/locale/findlocale.c b/locale/findlocale.c index 8d6e4e33e3..43ff7201c1 100644 --- a/locale/findlocale.c +++ b/locale/findlocale.c @@ -244,7 +244,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, locale_file = locale_file->successor[cnt]; if (locale_file == NULL) - return NULL; + { + /* If this is the second time we tried to load a failed + locale then the locale_file value comes from the cache + and we will not carry out any actual filesystem + operations so we must set ENOENT here. */ + __set_errno (ENOENT); + return NULL; + } } /* The LC_CTYPE category allows to check whether a locale is really @@ -291,8 +298,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, if (__gconv_compare_alias (upstr (ccodeset, ccodeset), upstr (clocale_codeset, clocale_codeset)) != 0) - /* The codesets are not identical, don't use the locale. */ - return NULL; + { + /* The codesets are not identical, don't use the locale. + If this is the second time we tried to load a locale + whose codeset doesn't match then the result came from + the cache and must set ENOENT here. */ + __set_errno (ENOENT); + return NULL; + } } /* Determine the locale name for which loading succeeded. This diff --git a/locale/loadlocale.c b/locale/loadlocale.c index 1e5f93e927..991c0591e9 100644 --- a/locale/loadlocale.c +++ b/locale/loadlocale.c @@ -237,7 +237,6 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) int save_err; int alloc = ld_mapped; - file->decided = 1; file->data = NULL; fd = __open_nocancel (file->filename, O_RDONLY | O_CLOEXEC); @@ -345,6 +344,7 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) newdata->alloc = alloc; file->data = newdata; + file->decided = 1; } void diff --git a/localedata/Makefile b/localedata/Makefile index 713e7aebad..6d0bac225b 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -239,6 +239,7 @@ tests = \ tst-iconv-emojis-trans \ tst-iconv-math-trans \ tst-leaks \ + tst-locale-loadlocale \ tst-mbswcs1 \ tst-mbswcs2 \ tst-mbswcs3 \ @@ -325,6 +326,7 @@ LOCALES := \ dsb_DE.UTF-8 \ dz_BT.UTF-8 \ en_GB.UTF-8 \ + en_US \ en_US.ANSI_X3.4-1968 \ en_US.ISO-8859-1\ en_US.UTF-8 \ @@ -406,6 +408,8 @@ include ../gen-locales.mk $(objpfx)tst-iconv-emojis-trans.out: $(gen-locales) $(objpfx)tst-iconv-math-trans.out: $(gen-locales) +# tst-locale-loadlocale: Needs an en_US-named locale for the test. +$(objpfx)tst-locale-loadlocale.out: $(gen-locales) endif include ../Rules diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh index b8c422ad13..e400d02b82 100644 --- a/localedata/gen-locale.sh +++ b/localedata/gen-locale.sh @@ -48,9 +48,9 @@ generate_locale () } locfile=`echo $locfile|sed 's|.*/\([^/]*/LC_CTYPE\)|\1|'` -locale=`echo $locfile|sed 's|\([^.]*\)[.].*/LC_CTYPE|\1|'` -charmap=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|'` -modifier=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` +locale=`echo $locfile|sed 's|\([^.]*\).*/LC_CTYPE|\1|'` +charmap=`echo $locfile|sed -e 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|' -e 's|^\.||g'` +modifier=`echo $locfile|sed 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` echo "Generating locale $locale.$charmap: this might take a while..." @@ -63,10 +63,16 @@ echo "Generating locale $locale.$charmap: this might take a while..." # you to develop in-progress locales. flags="" +charmap_real="$charmap" + +# If no character map is specified then we fall back to UTF-8. +if [ -z "$charmap" ]; then + charmap_real="UTF-8" +fi + # For SJIS the charmap is SHIFT_JIS. We just want the locale to have # a slightly nicer name instead of using "*.SHIFT_SJIS", but that # means we need a mapping here. -charmap_real="$charmap" if [ "$charmap" = "SJIS" ]; then charmap_real="SHIFT_JIS" fi @@ -80,4 +86,12 @@ if [ "$charmap_real" = 'SHIFT_JIS' ] \ flags="$flags --no-warnings=ascii" fi -generate_locale $charmap_real $locale$modifier $locale.$charmap$modifier "$flags" +# If the character map is not specified then we output a locale +# with the just the name of the locale e.g. eo, en_US. This is +# used for test locales that act as fallbacks. +output_file="$locale.$charmap$modifier" +if [ -z "$charmap" ]; then + output_file="$locale" +fi + +generate_locale $charmap_real $locale$modifier $output_file "$flags" diff --git a/localedata/tst-locale-loadlocale.c b/localedata/tst-locale-loadlocale.c new file mode 100644 index 0000000000..64796e975d --- /dev/null +++ b/localedata/tst-locale-loadlocale.c @@ -0,0 +1,67 @@ +/* Test for locale loading error handling (Bug 14247) + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <errno.h> +#include <locale.h> +#include <support/check.h> +#include <support/support.h> + +static int +do_test (void) +{ + locale_t loc = NULL; + + /* We must load an en_US-based locale for the final test to fail. + The locale loading code will find the en_US locale already loaded + but the codesets won't match. */ + xsetlocale (LC_ALL, "en_US.UTF-8"); + + /* Call newlocale with an invalid locale. We expect the test system + does not have "invalidlocale" locale. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + /* Call newlocale again with the same name. This triggers bug 14247 where + the second call fails to set errno correctly. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + /* Next we attempt to load a locale that exists but whose codeset + does not match the codeset of the locale on the system. + This is more difficult to test but we rely on the fact that locale + processing will normalize the locale name and attempt to open + "en_US" with no codeset as a fallback and this will allow us to + compare a loaded "en_US" locale with a UTF-8 codeset to the + ficiticious "en_US.utf99" and get a codeset match failure. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + /* Call newlocale again with the same name. This triggers bug 14247 where + the second call fails to set errno correctly. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + return 0; +} + +#include <support/test-driver.c>