@@ -30,6 +30,7 @@
# define USTRING_TYPE unsigned char
# define STRCOLL __strcoll_l
# define STRDIFF __strdiff
+# define STRDIFF_L __strdiff_l
# define STRCMP strcmp
# define WEIGHT_H "../locale/weight.h"
# define SUFFIX MB
@@ -42,19 +43,7 @@
#include "../locale/localeinfo.h"
#include WEIGHT_H
-#define MASK_UTF8_7BIT (1 << 7)
-#define MASK_UTF8_START (3 << 6)
-
-size_t
-STRDIFF (const STRING_TYPE *s, const STRING_TYPE *t)
-{
- size_t n;
-
- for (n = 0; *s != '\0' && *s++ == *t++; ++n)
- continue;
-
- return n;
-}
+#include "string/strdiff.h"
/* Track status while looking for sequences in a string. */
typedef struct
@@ -274,24 +263,14 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
if (nrules == 0)
return STRCMP (s1, s2);
- /* Fast forward to the position of the first difference. Needs to be
- encoding aware as the byte-by-byte comparison can stop in the middle
- of a char sequence for multibyte encodings like UTF-8. */
+ /* Fast forward to the position of the first difference. */
uint_fast32_t encoding =
current->values[_NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE)].word;
- if (encoding != __cet_other)
- {
- size_t diff = STRDIFF (s1, s2);
- if (diff > 0)
- {
- if (encoding == __cet_utf8 && (*(s1 + diff) & MASK_UTF8_7BIT) != 0)
- do
- diff--;
- while (diff > 0 && (*(s1 + diff) & MASK_UTF8_START) != MASK_UTF8_START);
- s1 += diff;
- s2 += diff;
- }
- }
+
+ if (sizeof (STRING_TYPE) > 1)
+ STRDIFF_L (&s1, &s2, __cet_8bit);
+ else if (encoding != __cet_other)
+ STRDIFF_L (&s1, &s2, encoding);
/* Catch empty strings. */
if (__glibc_unlikely (*s1 == '\0') || __glibc_unlikely (*s2 == '\0'))
new file mode 100644
@@ -0,0 +1,36 @@
+static size_t
+STRDIFF (const STRING_TYPE *s, const STRING_TYPE *t)
+{
+ size_t n;
+
+ for (n = 0; *s != '\0' && *s++ == *t++; ++n)
+ continue;
+
+ return n;
+}
+
+#define UTF8_CONT_START 128
+#define UTF8_CONT_END 195
+
+static void
+STRDIFF_L (const STRING_TYPE **s1, const STRING_TYPE **s2, uint_fast32_t encoding)
+{
+ size_t diff = STRDIFF (*s1, *s2);
+
+ if (encoding == __cet_utf8)
+ {
+ USTRING_TYPE c = *(*s1 + diff);
+ if (((UTF8_CONT_START <= c) & (c <= UTF8_CONT_END)) && diff > 0)
+ {
+ diff --;
+ c = *(*s1 + diff);
+ if (((UTF8_CONT_START <= c) & (c <= UTF8_CONT_END)) && diff > 0)
+ diff --;
+ c = *(*s1 + diff);
+ if (((UTF8_CONT_START <= c) & (c <= UTF8_CONT_END)) && diff > 0)
+ diff --;
+ }
+ }
+ *s1 += diff;
+ *s2 += diff;
+}