diff mbox

locale: Make _nl_C_locobj_ptr usable outside of libc

Message ID 583f7e67-628e-cd00-950f-7f2602ab3710@redhat.com
State New
Headers show

Commit Message

Florian Weimer Sept. 19, 2016, 2:33 p.m. UTC
On 09/16/2016 01:53 PM, Florian Weimer wrote:
> I need something like this to fix incorrect locale dependence in
> libresolv (bug 19687).
>
> I tested this with the fixes, including one for nscd, which uses
> __libc_C_locobj@GLIBC_PRIVATE.  libresolv uses _nl_C_locobj@GLIBC_PRIVATE.
>
> I'm not entirely sure if we should use the function approach everywhere
> instead.  But as far as I can tell, this patch does not lead to the
> allocation of space for a _nl_C_locobj copy in libresolv (which we would
> not want because it is wasteful).
>
> An alternative would add of functions such as those in <ctype.h> and
> strcasecmp/strncasecmp which are always tied to the C locale.  This
> would be more efficient because access to the locale object is easier,
> or the variants can even be optimized inline (such as isblank).

This patch implements the alternative approach.  With it, is no longer 
necessary to export the C locale object.

I added __isdigit_C and __isxdigit_C implementations, although maybe 
they are not required because isdigit and isxdigit can be changed.

Beyond the new test, I checked that the approach works by converting a 
few configuration file parsers, and fixing the domain name comparisons 
in libresolv.

Florian
diff mbox

Patch

Implement character classification and conversion tied to the C locale

2016-09-19  Florian Weimer  <fweimer@redhat.com>

	Character classification and conversion tied to the C locale.
	* ctype/C-locale.h: New file.
	* string/strcasecmp_C.c: Likewise.
	* string/strncasecmp_C.c: Likewise.
	* string/Makefile (routines): Add strcasecmp_C, strncasecmp_C.
	* string/Versions (GLIBC_PRIVATE): Add __strcasecmp_C,
	__strncasecmp_C.

2016-09-19  Florian Weimer  <fweimer@redhat.com>

	* tst-C-locale.c (get_classification_ctype): New test.
	* Makefile (tests): Add it.

diff --git a/ctype/C-locale.h b/ctype/C-locale.h
new file mode 100644
index 0000000..ba41a10
--- /dev/null
+++ b/ctype/C-locale.h
@@ -0,0 +1,111 @@ 
+/* Character classification and string comparison in the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _C_LOCALE_H
+#define _C_LOCALE_H 1
+
+#include <stddef.h>
+
+static inline int
+__isalnum_C (int ch)
+{
+  return ('0' <= ch && ch <= '9')
+    || ('A' <= ch && ch <= 'Z')
+    || ('a' <= ch && ch <= 'z');
+}
+
+static inline int
+__isalpha_C (int ch)
+{
+  return ('A' <= ch && ch <= 'Z')
+    || ('a' <= ch && ch <= 'z');
+}
+
+static inline int
+__isblank_C (int ch)
+{
+  return ch == ' ' || ch == '\t';
+}
+
+static inline int
+__isdigit_C (int ch)
+{
+  return '0' <= ch && ch <= '9';
+}
+
+static inline int
+__islower_C (int ch)
+{
+  return 'a' <= ch && ch <= 'z';
+}
+
+static inline int
+__isspace_C (int ch)
+{
+  return ch == ' '
+    || ch == '\f'
+    || ch == '\n'
+    || ch == '\r'
+    || ch == '\t'
+    || ch == '\v';
+}
+
+static inline int
+__isupper_C (int ch)
+{
+  return 'A' <= ch && ch <= 'Z';
+}
+
+static inline int
+__isxdigit_C (int ch)
+{
+  return ('0' <= ch && ch <= '9')
+    || ('A' <= ch && ch <= 'F')
+    || ('a' <= ch && ch <= 'f');
+}
+
+static inline int
+__tolower_C (int ch)
+{
+  if ('A' <= ch && ch <= 'Z')
+    return ch - 'A' + 'a';
+  /* Required by the standard. */
+  if (ch == -1)
+    return ch;
+  /* Required to match towlower behavior.  */
+  return ch & 0xff;
+}
+
+static inline int
+__toupper_C (int ch)
+{
+  if ('a' <= ch && ch <= 'z')
+    return ch - 'a' + 'A';
+  /* Required by the standard. */
+  if (ch == -1)
+    return ch;
+  /* Required to match towlower behavior.  */
+  return ch & 0xff;
+}
+
+int __strcasecmp_C (const char *, const char *);
+libc_hidden_proto (__strcasecmp_C)
+int __strncasecmp_C (const char *, const char *, size_t);
+libc_hidden_proto (__strncasecmp_C)
+
+#endif /* _C_LOCALE_H */
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index f5c58ae..a9354fe 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,8 @@ 
+2016-09-19  Florian Weimer  <fweimer@redhat.com>
+
+	* tst-C-locale.c (get_classification_ctype): New test.
+	* Makefile (tests): Add it.
+
 2016-08-24  Ernestas Kulik  <ernestas.kulik@gmail.com>
 
 	[BZ #20497]
diff --git a/localedata/Makefile b/localedata/Makefile
index 4ecb192..21565d9 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -75,7 +75,7 @@  tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
 	tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
 	tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
 	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
-	tst-wctype
+	tst-wctype tst-C-locale
 tests-static = bug-setlocale1-static
 tests += $(tests-static)
 ifeq (yes,$(build-shared))
diff --git a/localedata/tst-C-locale.c b/localedata/tst-C-locale.c
new file mode 100644
index 0000000..2408abb
--- /dev/null
+++ b/localedata/tst-C-locale.c
@@ -0,0 +1,208 @@ 
+/* Tests for character classification and string comparison in the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctype/C-locale.h>
+#include <ctype.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+struct classification
+{
+  bool alnum;
+  bool alpha;
+  bool blank;
+  bool digit;
+  bool lower;
+  bool space;
+  bool upper;
+  bool xdigit;
+  int as_lower;
+  int as_upper;
+};
+
+#define STORE_RESULT(name) result[ch + 128].name = MANGLE (name) (ch)
+#define GET_CLASSIFICATION             \
+  for (int ch = -128; ch < 256; ++ch)  \
+    {                                  \
+      STORE_RESULT (alnum);            \
+      STORE_RESULT (alpha);            \
+      STORE_RESULT (blank);            \
+      STORE_RESULT (digit);            \
+      STORE_RESULT (lower);            \
+      STORE_RESULT (space);            \
+      STORE_RESULT (upper);            \
+      STORE_RESULT (xdigit);           \
+    }
+
+static void
+get_classification_ctype (struct classification *result)
+{
+#define MANGLE(name) is##name
+  GET_CLASSIFICATION
+#undef MANGLE
+  for (int ch = -128; ch < 256; ++ch)
+    {
+      result[ch + 128].as_lower = tolower (ch);
+      result[ch + 128].as_upper = toupper (ch);
+    }
+}
+
+static void
+get_classification_C_locale (struct classification *result)
+{
+#define MANGLE(name) __is##name##_C
+  GET_CLASSIFICATION
+#undef MANGLE
+  for (int ch = -128; ch < 256; ++ch)
+    {
+      result[ch + 128].as_lower = __tolower_C (ch);
+      result[ch + 128].as_upper = __toupper_C (ch);
+    }
+}
+
+#undef GET_CLASSIFICATION
+#undef STORE_RESULT
+
+static bool errors;
+
+static void
+check_classification_1 (const char *context, int ch, const char *category,
+                        bool expected, bool actual)
+{
+  if (actual != expected)
+    {
+      printf ("error: %s: __is%s_C (%d): expected: %d actual: %d\n",
+              context, category, ch, expected, actual);
+      errors = true;
+    }
+}
+
+static void
+check_caseconv_1 (const char *context, int ch, const char *op,
+                  int expected, int actual)
+{
+  if (actual != expected)
+    {
+      printf ("error: %s: __%s_C (%d): expected: %d actual: %d\n",
+              context, op, ch, expected, actual);
+      errors = true;
+    }
+}
+
+static void
+check_classification (const char *context,
+                      struct classification *expected,
+                      struct classification *actual)
+{
+  for (int ch = -128; ch < 256; ++ch)
+    {
+#define CHECK(name)                                     \
+      check_classification_1 (context, ch, #name,       \
+                              expected[128 + ch].name,  \
+                              actual[128 + ch].name)
+      CHECK (alnum);
+      CHECK (alpha);
+      CHECK (blank);
+      CHECK (digit);
+      CHECK (lower);
+      CHECK (space);
+      CHECK (upper);
+      CHECK (xdigit);
+#undef CHECK
+      check_caseconv_1 (context, ch, "tolower",
+                        expected[128 + ch].as_lower,
+                        actual[128 + ch].as_lower);
+      check_caseconv_1 (context, ch, "toupper",
+                        expected[128 + ch].as_upper,
+                        actual[128 + ch].as_upper);
+    }
+}
+
+static int
+sign (int n)
+{
+  if (n < 0)
+    return -1;
+  else if (n > 0)
+    return 1;
+  return 0;
+}
+
+static void
+get_casecmp (signed char *result, int cmp (const char *, const char *))
+{
+  char left[2] = { 0, 0 };
+  char right[2] = { 0, 0 };
+  for (int l = 0; l < 256; ++l)
+    {
+      left[0] = l;
+      for (int r = 0; r < 256; ++r)
+        {
+          right[0] = r;
+          result[l * 256 + r] = sign (cmp (left, right));
+        }
+    }
+}
+
+static int
+do_test (void)
+{
+  /* Tests while running under the C locale.  */
+  struct classification class_expected[128 + 256];
+  get_classification_ctype (class_expected);
+  struct classification class_before[128 + 256];
+  get_classification_C_locale (class_before);
+  check_classification ("before setlocale", class_expected, class_before);
+
+  signed char cmp_expected[256 * 256];
+  get_casecmp (cmp_expected, strcasecmp);
+  signed char cmp_before[256 * 256];
+  get_casecmp (cmp_before, __strcasecmp_C);
+  if (memcmp (cmp_before, cmp_expected, sizeof (cmp_before)) != 0)
+    {
+      printf ("error: strcasecmp_C incorrect before setlocale\n");
+      return 1;
+    }
+
+  /* Switch locale.  */
+  if (setlocale (LC_ALL, "en_US.ISO-8859-1") == NULL)
+    {
+      printf ("error: setlocale: %m\n");
+      return 1;
+    }
+
+  /* Tests while running under a non-C locale.  */
+  struct classification class_after[128 + 256];
+  get_classification_C_locale (class_after);
+  check_classification ("after setlocale", class_expected, class_after);
+
+  signed char cmp_after[256 * 256];
+  get_casecmp (cmp_after, __strcasecmp_C);
+  if (memcmp (cmp_after, cmp_expected, sizeof (cmp_after)) != 0)
+    {
+      printf ("error: strcasecmp_C incorrect after setlocale\n");
+      return 1;
+    }
+
+  return errors;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/string/Makefile b/string/Makefile
index 69d3f80..d021805 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -41,7 +41,8 @@  routines	:= strcat strchr strcmp strcoll strcpy strcspn		\
 				     addsep replace)			\
 		   envz basename					\
 		   strcoll_l strxfrm_l string-inlines memrchr		\
-		   xpg-strerror strerror_l
+		   xpg-strerror strerror_l 				\
+		   strcasecmp_C strncasecmp_C
 
 strop-tests	:= memchr memcmp memcpy memmove mempcpy memset memccpy	\
 		   stpcpy stpncpy strcat strchr strcmp strcpy strcspn	\
diff --git a/string/Versions b/string/Versions
index 475c1fd..17d4007 100644
--- a/string/Versions
+++ b/string/Versions
@@ -82,4 +82,10 @@  libc {
   }
   GLIBC_2.24 {
   }
+
+  GLIBC_PRIVATE {
+    # Used from other libraries.
+    __strcasecmp_C;
+    __strncasecmp_C;
+  }
 }
diff --git a/string/strcasecmp_C.c b/string/strcasecmp_C.c
new file mode 100644
index 0000000..b99fb52
--- /dev/null
+++ b/string/strcasecmp_C.c
@@ -0,0 +1,30 @@ 
+/* strcasecmp tied to the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctype/C-locale.h>
+#include <locale.h>
+#include <string.h>
+
+/* Variant of strcasecmp which always uses the C locale.  */
+int
+__strcasecmp_C (const char *left, const char *right)
+{
+  return __strcasecmp_l (left, right, _nl_C_locobj_ptr);
+}
+
+libc_hidden_def (__strcasecmp_C)
diff --git a/string/strncasecmp_C.c b/string/strncasecmp_C.c
new file mode 100644
index 0000000..08c1516
--- /dev/null
+++ b/string/strncasecmp_C.c
@@ -0,0 +1,30 @@ 
+/* strncasecmp tied to the C locale.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ctype/C-locale.h>
+#include <locale.h>
+#include <string.h>
+
+/* Variant of strncasecmp which always uses the C locale.  */
+int
+__strncasecmp_C (const char *left, const char *right, size_t length)
+{
+  return __strncasecmp_l (left, right, length, _nl_C_locobj_ptr);
+}
+
+libc_hidden_def (__strncasecmp_C)