Implement character classification and conversion tied to the C locale
2016-09-19 Florian Weimer <fweimer@redhat.com>
Character classification and conversion tied to the C locale.
* ctype/C-locale.h: New file.
* string/strcasecmp_C.c: Likewise.
* string/strncasecmp_C.c: Likewise.
* string/Makefile (routines): Add strcasecmp_C, strncasecmp_C.
* string/Versions (GLIBC_PRIVATE): Add __strcasecmp_C,
__strncasecmp_C.
2016-09-19 Florian Weimer <fweimer@redhat.com>
* tst-C-locale.c (get_classification_ctype): New test.
* Makefile (tests): Add it.
new file mode 100644
@@ -0,0 +1,111 @@
+/* Character classification and string comparison in the C locale.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _C_LOCALE_H
+#define _C_LOCALE_H 1
+
+#include <stddef.h>
+
+static inline int
+__isalnum_C (int ch)
+{
+ return ('0' <= ch && ch <= '9')
+ || ('A' <= ch && ch <= 'Z')
+ || ('a' <= ch && ch <= 'z');
+}
+
+static inline int
+__isalpha_C (int ch)
+{
+ return ('A' <= ch && ch <= 'Z')
+ || ('a' <= ch && ch <= 'z');
+}
+
+static inline int
+__isblank_C (int ch)
+{
+ return ch == ' ' || ch == '\t';
+}
+
+static inline int
+__isdigit_C (int ch)
+{
+ return '0' <= ch && ch <= '9';
+}
+
+static inline int
+__islower_C (int ch)
+{
+ return 'a' <= ch && ch <= 'z';
+}
+
+static inline int
+__isspace_C (int ch)
+{
+ return ch == ' '
+ || ch == '\f'
+ || ch == '\n'
+ || ch == '\r'
+ || ch == '\t'
+ || ch == '\v';
+}
+
+static inline int
+__isupper_C (int ch)
+{
+ return 'A' <= ch && ch <= 'Z';
+}
+
+static inline int
+__isxdigit_C (int ch)
+{
+ return ('0' <= ch && ch <= '9')
+ || ('A' <= ch && ch <= 'F')
+ || ('a' <= ch && ch <= 'f');
+}
+
+static inline int
+__tolower_C (int ch)
+{
+ if ('A' <= ch && ch <= 'Z')
+ return ch - 'A' + 'a';
+ /* Required by the standard. */
+ if (ch == -1)
+ return ch;
+ /* Required to match towlower behavior. */
+ return ch & 0xff;
+}
+
+static inline int
+__toupper_C (int ch)
+{
+ if ('a' <= ch && ch <= 'z')
+ return ch - 'a' + 'A';
+ /* Required by the standard. */
+ if (ch == -1)
+ return ch;
+ /* Required to match towlower behavior. */
+ return ch & 0xff;
+}
+
+int __strcasecmp_C (const char *, const char *);
+libc_hidden_proto (__strcasecmp_C)
+int __strncasecmp_C (const char *, const char *, size_t);
+libc_hidden_proto (__strncasecmp_C)
+
+#endif /* _C_LOCALE_H */
@@ -1,3 +1,8 @@
+2016-09-19 Florian Weimer <fweimer@redhat.com>
+
+ * tst-C-locale.c (get_classification_ctype): New test.
+ * Makefile (tests): Add it.
+
2016-08-24 Ernestas Kulik <ernestas.kulik@gmail.com>
[BZ #20497]
@@ -75,7 +75,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
- tst-wctype
+ tst-wctype tst-C-locale
tests-static = bug-setlocale1-static
tests += $(tests-static)
ifeq (yes,$(build-shared))
new file mode 100644
@@ -0,0 +1,208 @@
+/* Tests for character classification and string comparison in the C locale.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <ctype/C-locale.h>
+#include <ctype.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+struct classification
+{
+ bool alnum;
+ bool alpha;
+ bool blank;
+ bool digit;
+ bool lower;
+ bool space;
+ bool upper;
+ bool xdigit;
+ int as_lower;
+ int as_upper;
+};
+
+#define STORE_RESULT(name) result[ch + 128].name = MANGLE (name) (ch)
+#define GET_CLASSIFICATION \
+ for (int ch = -128; ch < 256; ++ch) \
+ { \
+ STORE_RESULT (alnum); \
+ STORE_RESULT (alpha); \
+ STORE_RESULT (blank); \
+ STORE_RESULT (digit); \
+ STORE_RESULT (lower); \
+ STORE_RESULT (space); \
+ STORE_RESULT (upper); \
+ STORE_RESULT (xdigit); \
+ }
+
+static void
+get_classification_ctype (struct classification *result)
+{
+#define MANGLE(name) is##name
+ GET_CLASSIFICATION
+#undef MANGLE
+ for (int ch = -128; ch < 256; ++ch)
+ {
+ result[ch + 128].as_lower = tolower (ch);
+ result[ch + 128].as_upper = toupper (ch);
+ }
+}
+
+static void
+get_classification_C_locale (struct classification *result)
+{
+#define MANGLE(name) __is##name##_C
+ GET_CLASSIFICATION
+#undef MANGLE
+ for (int ch = -128; ch < 256; ++ch)
+ {
+ result[ch + 128].as_lower = __tolower_C (ch);
+ result[ch + 128].as_upper = __toupper_C (ch);
+ }
+}
+
+#undef GET_CLASSIFICATION
+#undef STORE_RESULT
+
+static bool errors;
+
+static void
+check_classification_1 (const char *context, int ch, const char *category,
+ bool expected, bool actual)
+{
+ if (actual != expected)
+ {
+ printf ("error: %s: __is%s_C (%d): expected: %d actual: %d\n",
+ context, category, ch, expected, actual);
+ errors = true;
+ }
+}
+
+static void
+check_caseconv_1 (const char *context, int ch, const char *op,
+ int expected, int actual)
+{
+ if (actual != expected)
+ {
+ printf ("error: %s: __%s_C (%d): expected: %d actual: %d\n",
+ context, op, ch, expected, actual);
+ errors = true;
+ }
+}
+
+static void
+check_classification (const char *context,
+ struct classification *expected,
+ struct classification *actual)
+{
+ for (int ch = -128; ch < 256; ++ch)
+ {
+#define CHECK(name) \
+ check_classification_1 (context, ch, #name, \
+ expected[128 + ch].name, \
+ actual[128 + ch].name)
+ CHECK (alnum);
+ CHECK (alpha);
+ CHECK (blank);
+ CHECK (digit);
+ CHECK (lower);
+ CHECK (space);
+ CHECK (upper);
+ CHECK (xdigit);
+#undef CHECK
+ check_caseconv_1 (context, ch, "tolower",
+ expected[128 + ch].as_lower,
+ actual[128 + ch].as_lower);
+ check_caseconv_1 (context, ch, "toupper",
+ expected[128 + ch].as_upper,
+ actual[128 + ch].as_upper);
+ }
+}
+
+static int
+sign (int n)
+{
+ if (n < 0)
+ return -1;
+ else if (n > 0)
+ return 1;
+ return 0;
+}
+
+static void
+get_casecmp (signed char *result, int cmp (const char *, const char *))
+{
+ char left[2] = { 0, 0 };
+ char right[2] = { 0, 0 };
+ for (int l = 0; l < 256; ++l)
+ {
+ left[0] = l;
+ for (int r = 0; r < 256; ++r)
+ {
+ right[0] = r;
+ result[l * 256 + r] = sign (cmp (left, right));
+ }
+ }
+}
+
+static int
+do_test (void)
+{
+ /* Tests while running under the C locale. */
+ struct classification class_expected[128 + 256];
+ get_classification_ctype (class_expected);
+ struct classification class_before[128 + 256];
+ get_classification_C_locale (class_before);
+ check_classification ("before setlocale", class_expected, class_before);
+
+ signed char cmp_expected[256 * 256];
+ get_casecmp (cmp_expected, strcasecmp);
+ signed char cmp_before[256 * 256];
+ get_casecmp (cmp_before, __strcasecmp_C);
+ if (memcmp (cmp_before, cmp_expected, sizeof (cmp_before)) != 0)
+ {
+ printf ("error: strcasecmp_C incorrect before setlocale\n");
+ return 1;
+ }
+
+ /* Switch locale. */
+ if (setlocale (LC_ALL, "en_US.ISO-8859-1") == NULL)
+ {
+ printf ("error: setlocale: %m\n");
+ return 1;
+ }
+
+ /* Tests while running under a non-C locale. */
+ struct classification class_after[128 + 256];
+ get_classification_C_locale (class_after);
+ check_classification ("after setlocale", class_expected, class_after);
+
+ signed char cmp_after[256 * 256];
+ get_casecmp (cmp_after, __strcasecmp_C);
+ if (memcmp (cmp_after, cmp_expected, sizeof (cmp_after)) != 0)
+ {
+ printf ("error: strcasecmp_C incorrect after setlocale\n");
+ return 1;
+ }
+
+ return errors;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
@@ -41,7 +41,8 @@ routines := strcat strchr strcmp strcoll strcpy strcspn \
addsep replace) \
envz basename \
strcoll_l strxfrm_l string-inlines memrchr \
- xpg-strerror strerror_l
+ xpg-strerror strerror_l \
+ strcasecmp_C strncasecmp_C
strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
@@ -82,4 +82,10 @@ libc {
}
GLIBC_2.24 {
}
+
+ GLIBC_PRIVATE {
+ # Used from other libraries.
+ __strcasecmp_C;
+ __strncasecmp_C;
+ }
}
new file mode 100644
@@ -0,0 +1,30 @@
+/* strcasecmp tied to the C locale.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <ctype/C-locale.h>
+#include <locale.h>
+#include <string.h>
+
+/* Variant of strcasecmp which always uses the C locale. */
+int
+__strcasecmp_C (const char *left, const char *right)
+{
+ return __strcasecmp_l (left, right, _nl_C_locobj_ptr);
+}
+
+libc_hidden_def (__strcasecmp_C)
new file mode 100644
@@ -0,0 +1,30 @@
+/* strncasecmp tied to the C locale.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <ctype/C-locale.h>
+#include <locale.h>
+#include <string.h>
+
+/* Variant of strncasecmp which always uses the C locale. */
+int
+__strncasecmp_C (const char *left, const char *right, size_t length)
+{
+ return __strncasecmp_l (left, right, length, _nl_C_locobj_ptr);
+}
+
+libc_hidden_def (__strncasecmp_C)