Message ID | mvm8qzar1de.fsf@suse.de |
---|---|
State | New |
Headers | show |
Series | iconv: Fix matching of multi-character transliterations (bug 31859) | expand |
On 6/12/24 4:50 AM, Andreas Schwab wrote: > Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of > the input buffer is reached. Otherwise it is a non-match, and other > patterns should be tried. Looking forward to v4. Fix looks correct to me. I have comments about the test case. > --- > iconv/Makefile | 13 ++++++++++ > iconv/gconv_trans.c | 2 +- > iconv/tst-translit-locale | 10 ++++++++ > iconv/tst-translit-mchar.c | 48 +++++++++++++++++++++++++++++++++++++ > iconv/tst-translit-mchar.sh | 48 +++++++++++++++++++++++++++++++++++++ > 5 files changed, 120 insertions(+), 1 deletion(-) > create mode 100644 iconv/tst-translit-locale > create mode 100644 iconv/tst-translit-mchar.c > create mode 100644 iconv/tst-translit-mchar.sh > > diff --git a/iconv/Makefile b/iconv/Makefile > index 63afc853ff..e93322da85 100644 > --- a/iconv/Makefile > +++ b/iconv/Makefile > @@ -57,6 +57,10 @@ tests = \ > tst-iconv-opt \ > # tests > > +test-srcs := \ > + tst-translit-mchar \ > + # test-srcs OK. Add test sources. > + > others = iconv_prog iconvconfig > install-others-programs = $(inst_bindir)/iconv > install-sbin = iconvconfig > @@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) > ifeq ($(run-built-tests),yes) > xtests-special += $(objpfx)test-iconvconfig.out > tests-special += $(objpfx)tst-iconv_prog.out > +tests-special += $(objpfx)tst-translit-mchar.out OK. Add special test. > endif > > # Make a copy of the file because gconv module names are constructed > @@ -126,3 +131,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog > $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ > '$(run-program-env)' > $@; \ > $(evaluate-test) > + > +$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ > + $(objpfx)tst-translit-mchar \ > + tst-translit-locale We should be able to use LOCALES to just compile tst-tranlit-locale? > + $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \ > + '$(run-program-env)' '$(run-program-prefix-after-env)' \ > + $< > $@; \ > + $(evaluate-test) OK. > diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c > index 08b7a3f71d..44f0fd849a 100644 > --- a/iconv/gconv_trans.c > +++ b/iconv/gconv_trans.c > @@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step, > > /* Nothing found, continue searching. */ > } > - else if (cnt > 0) > + else if (cnt > 0 && winbuf + cnt == winbufend) OK. Looks correct to me, we not only need to match the prefix but must also be the right length. > /* This means that the input buffer contents matches a prefix of > an entry. Since we cannot match it unless we get more input, > we will tell the caller about it. */ > diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale > new file mode 100644 > index 0000000000..712b08628a > --- /dev/null > +++ b/iconv/tst-translit-locale > @@ -0,0 +1,10 @@ > +# Test multi-character transliteration rule > + > +LC_CTYPE > +copy "POSIX" > + > +translit_start > +"ÄÄ" "AA" OK. New locale, that provides a transliteration. > +translit_end > + > +END LC_CTYPE > diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c > new file mode 100644 > index 0000000000..72335976d6 > --- /dev/null > +++ b/iconv/tst-translit-mchar.c > @@ -0,0 +1,48 @@ > +/* Test multi-character transliterations. > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <locale.h> > +#include <iconv.h> > +#include <support/support.h> > +#include <support/check.h> > + > +static int > +do_test (void) > +{ > + iconv_t cd; > + /* An input sequence that shares a common prefix with a transliteration > + rule. */ > + char input[] = "\xc3\x84\xc3\x85"; For a simple rule test like this may we just use raw UTF-8? It makes it easier to read and understand. e.g. "ÄÅ" > + char *inptr = input; > + char outbuf[10]; > + char *outptr = outbuf; > + size_t inlen = sizeof (input), outlen = sizeof (outbuf); > + size_t n; > + > + xsetlocale (LC_CTYPE, "tst-translit"); > + > + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); > + TEST_VERIFY (cd != (iconv_t) -1); > + > + /* This call used to loop infinitely. */ > + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); > + TEST_VERIFY (iconv_close (cd) == 0); > + return n == 0; > +} > + > +#include <support/test-driver.c> > diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh > new file mode 100644 > index 0000000000..79efd6abc8 > --- /dev/null > +++ b/iconv/tst-translit-mchar.sh > @@ -0,0 +1,48 @@ > +#!/bin/sh > +# Testing of multi-character transliterations > +# Copyright (C) 2024 Free Software Foundation, Inc. > +# This file is part of the GNU C Library. > + > +# The GNU C Library is free software; you can redistribute it and/or > +# modify it under the terms of the GNU Lesser General Public > +# License as published by the Free Software Foundation; either > +# version 2.1 of the License, or (at your option) any later version. > + > +# The GNU C Library is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# Lesser General Public License for more details. > + > +# You should have received a copy of the GNU Lesser General Public > +# License along with the GNU C Library; if not, see > +# <https://www.gnu.org/licenses/>. > + > +set -e > + > +common_objpfx=$1 > +run_program_prefix_before_env=$2 > +run_program_env=$3 > +run_program_prefix_after_env=$4 > + > +# Generate data files. > +${run_program_prefix_before_env} \ > +${run_program_env} \ > +I18NPATH=../localedata \ > +${run_program_prefix_after_env} ${common_objpfx}locale/localedef \ > +--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$? > +if [ $ret -ne 1 ]; then > + echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)." > + exit 1 > +fi Can this just use LOCALES in iconv/Makefile? It should be able to compile the locale for you as all the other locales are compiled. > + > +set -x > + > +# Run the test. > +${run_program_prefix_before_env} \ > +${run_program_env} \ > +LOCPATH=${common_objpfx}iconv \ > +${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar Then you can get rid of those whole shell script and fold it up the iconv/Makefile as a test? > + > +# Local Variables: > +# mode:shell-script > +# End:
On Jun 20 2024, Carlos O'Donell wrote:
> We should be able to use LOCALES to just compile tst-tranlit-locale?
There are two issues here:
- LOCALES can only use locale sources from localedata/locales
- tst-tranlit-locale is a stub locale (only contains LC_CTYPE) that
localedef warns about, and gen-locale.sh does not accept that.
diff --git a/iconv/Makefile b/iconv/Makefile index 63afc853ff..e93322da85 100644 --- a/iconv/Makefile +++ b/iconv/Makefile @@ -57,6 +57,10 @@ tests = \ tst-iconv-opt \ # tests +test-srcs := \ + tst-translit-mchar \ + # test-srcs + others = iconv_prog iconvconfig install-others-programs = $(inst_bindir)/iconv install-sbin = iconvconfig @@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) ifeq ($(run-built-tests),yes) xtests-special += $(objpfx)test-iconvconfig.out tests-special += $(objpfx)tst-iconv_prog.out +tests-special += $(objpfx)tst-translit-mchar.out endif # Make a copy of the file because gconv module names are constructed @@ -126,3 +131,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ '$(run-program-env)' > $@; \ $(evaluate-test) + +$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ + $(objpfx)tst-translit-mchar \ + tst-translit-locale + $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \ + '$(run-program-env)' '$(run-program-prefix-after-env)' \ + $< > $@; \ + $(evaluate-test) diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c index 08b7a3f71d..44f0fd849a 100644 --- a/iconv/gconv_trans.c +++ b/iconv/gconv_trans.c @@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step, /* Nothing found, continue searching. */ } - else if (cnt > 0) + else if (cnt > 0 && winbuf + cnt == winbufend) /* This means that the input buffer contents matches a prefix of an entry. Since we cannot match it unless we get more input, we will tell the caller about it. */ diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale new file mode 100644 index 0000000000..712b08628a --- /dev/null +++ b/iconv/tst-translit-locale @@ -0,0 +1,10 @@ +# Test multi-character transliteration rule + +LC_CTYPE +copy "POSIX" + +translit_start +"ÄÄ" "AA" +translit_end + +END LC_CTYPE diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c new file mode 100644 index 0000000000..72335976d6 --- /dev/null +++ b/iconv/tst-translit-mchar.c @@ -0,0 +1,48 @@ +/* Test multi-character transliterations. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <locale.h> +#include <iconv.h> +#include <support/support.h> +#include <support/check.h> + +static int +do_test (void) +{ + iconv_t cd; + /* An input sequence that shares a common prefix with a transliteration + rule. */ + char input[] = "\xc3\x84\xc3\x85"; + char *inptr = input; + char outbuf[10]; + char *outptr = outbuf; + size_t inlen = sizeof (input), outlen = sizeof (outbuf); + size_t n; + + xsetlocale (LC_CTYPE, "tst-translit"); + + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); + TEST_VERIFY (cd != (iconv_t) -1); + + /* This call used to loop infinitely. */ + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + TEST_VERIFY (iconv_close (cd) == 0); + return n == 0; +} + +#include <support/test-driver.c> diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh new file mode 100644 index 0000000000..79efd6abc8 --- /dev/null +++ b/iconv/tst-translit-mchar.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# Testing of multi-character transliterations +# Copyright (C) 2024 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <https://www.gnu.org/licenses/>. + +set -e + +common_objpfx=$1 +run_program_prefix_before_env=$2 +run_program_env=$3 +run_program_prefix_after_env=$4 + +# Generate data files. +${run_program_prefix_before_env} \ +${run_program_env} \ +I18NPATH=../localedata \ +${run_program_prefix_after_env} ${common_objpfx}locale/localedef \ +--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$? +if [ $ret -ne 1 ]; then + echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)." + exit 1 +fi + +set -x + +# Run the test. +${run_program_prefix_before_env} \ +${run_program_env} \ +LOCPATH=${common_objpfx}iconv \ +${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar + +# Local Variables: +# mode:shell-script +# End: