Message ID | 1420973708-4447-1-git-send-email-leonhard.holz@web.de |
---|---|
State | New |
Headers | show |
Thanks, this looks good to me now. I'll commit this once Carlos confirms that I can. Siddhesh On Sun, 11 Jan 2015 11:55:08 +0100 Leonhard Holz <leonhard.holz@web.de> wrote: > 2015-01-11 Leonhard Holz <leonhard.holz@web.de> > > [BZ #16009] > * string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for > weights and rules. Use do_xfrm_cached if data fits in cache, do_xfrm > otherwise. Moved former main loop to... > * (do_xfrm_cached): New function. > * (do_xfrm): Non-caching version of do_xfrm_cached. Uses > find_idx, find_position and stack_push. > * (find_idx): New function. > * (find_position): Likewise. > * localedata/sort-test.sh: Added test run for do_xfrm. > * localedata/xfrm-test.c (main): Added command line option > -nocache to run the test with strings that are too large for the > STRXFRM cache. > > --- > localedata/sort-test.sh | 7 + > localedata/xfrm-test.c | 52 +++++- > string/strxfrm_l.c | 488 > ++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 447 > insertions(+), 100 deletions(-) > > diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh > index dc23268..1a10262 100644 > --- a/localedata/sort-test.sh > +++ b/localedata/sort-test.sh > @@ -53,11 +53,18 @@ for l in $lang; do > ${common_objpfx}localedata/xfrm-test $id < $cns.in \ > > ${common_objpfx}localedata/$cns.xout || here=1 > cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1 > + ${test_program_prefix_before_env} \ > + ${run_program_env} \ > + LC_ALL=$l ${test_program_prefix_after_env} \ > + ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \ > + > ${common_objpfx}localedata/$cns.nocache.xout || here=1 > + cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || > here=1 if test $here -eq 0; then > echo "$l xfrm-test OK" > else > echo "$l xfrm-test FAIL" > diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed > 's/^/ /' > + diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | > sed 's/^/ /' status=1 > fi > done > diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c > index 79494ba..3ab2140 100644 > --- a/localedata/xfrm-test.c > +++ b/localedata/xfrm-test.c > @@ -23,7 +23,10 @@ > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > +#include <stdbool.h> > > +/* Keep in sync with string/strxfrm_l.c. */ > +#define SMALL_STR_SIZE 4095 > > struct lines > { > @@ -37,6 +40,7 @@ int > main (int argc, char *argv[]) > { > int result = 0; > + bool nocache = false; > size_t nstrings, nstrings_max; > struct lines *strings; > char *line = NULL; > @@ -44,7 +48,18 @@ main (int argc, char *argv[]) > size_t n; > > if (argc < 2) > - error (1, 0, "usage: %s <random seed>", argv[0]); > + error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]); > + > + if (argc == 3) > + { > + if (strcmp (argv[2], "-nocache") == 0) > + nocache = true; > + else > + { > + printf ("Unknown option %s!\n", argv[2]); > + exit (1); > + } > + } > > setlocale (LC_ALL, ""); > > @@ -59,9 +74,9 @@ main (int argc, char *argv[]) > > while (1) > { > - char saved, *newp; > - int needed; > - int l; > + char saved, *word, *newp; > + size_t l, line_len, needed; > + > if (getline (&line, &len, stdin) < 0) > break; > > @@ -83,10 +98,35 @@ main (int argc, char *argv[]) > > saved = line[l]; > line[l] = '\0'; > - needed = strxfrm (NULL, line, 0); > + > + if (nocache) > + { > + line_len = strlen (line); > + word = malloc (line_len + SMALL_STR_SIZE + 1); > + if (word == NULL) > + { > + printf ("malloc failed: %m\n"); > + exit (1); > + } > + memset (word, ' ', SMALL_STR_SIZE); > + memcpy (word + SMALL_STR_SIZE, line, line_len); > + word[line_len + SMALL_STR_SIZE] = '\0'; > + } > + else > + word = line; > + > + needed = strxfrm (NULL, word, 0); > newp = malloc (needed + 1); > - strxfrm (newp, line, needed + 1); > + if (newp == NULL) > + { > + printf ("malloc failed: %m\n"); > + exit (1); > + } > + strxfrm (newp, word, needed + 1); > strings[nstrings].xfrm = newp; > + > + if (nocache) > + free (word); > line[l] = saved; > ++nstrings; > } > diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c > index 9849447..921d1f7 100644 > --- a/string/strxfrm_l.c > +++ b/string/strxfrm_l.c > @@ -40,9 +40,24 @@ > #define CONCAT(a,b) CONCAT1(a,b) > #define CONCAT1(a,b) a##b > > +/* Maximum string size that is calculated with cached indices. > Right now this > + is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 > has to be > + lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in > sync. */ +#define SMALL_STR_SIZE 4095 > + > #include "../locale/localeinfo.h" > #include WEIGHT_H > > +/* Group locale data for shorter parameter lists. */ > +typedef struct > +{ > + uint_fast32_t nrules; > + unsigned char *rulesets; > + USTRING_TYPE *weights; > + int32_t *table; > + USTRING_TYPE *extra; > + int32_t *indirect; > +} locale_data_t; > > #ifndef WIDE_CHAR_VERSION > > @@ -81,113 +96,325 @@ utf8_encode (char *buf, int val) > } > #endif > > +/* Find next weight and rule index. Inlined since called for every > char. */ +static __always_inline size_t > +find_idx (const USTRING_TYPE **us, int32_t *weight_idx, > + unsigned char *rule_idx, const locale_data_t *l_data, > const int pass) +{ > + int32_t tmp = findidx (l_data->table, l_data->indirect, > l_data->extra, us, > + -1); > + *rule_idx = tmp >> 24; > + int32_t idx = tmp & 0xffffff; > + size_t len = l_data->weights[idx++]; > + > + /* Skip over indices of previous levels. */ > + for (int i = 0; i < pass; i++) > + { > + idx += len; > + len = l_data->weights[idx++]; > + } > > -size_t > -STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, > __locale_t l) > + *weight_idx = idx; > + return len; > +} > + > +static int > +find_position (const USTRING_TYPE *us, const locale_data_t *l_data, > + const int pass) > { > - struct __locale_data *current = l->__locales[LC_COLLATE]; > - uint_fast32_t nrules = current->values[_NL_ITEM_INDEX > (_NL_COLLATE_NRULES)].word; > - /* We don't assign the following values right away since it might > be > - unnecessary in case there are no rules. */ > - const unsigned char *rulesets; > - const int32_t *table; > - const USTRING_TYPE *weights; > - const USTRING_TYPE *extra; > - const int32_t *indirect; > + int32_t weight_idx; > + unsigned char rule_idx; > + const USTRING_TYPE *usrc = us; > + > + find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass); > + return l_data->rulesets[rule_idx * l_data->nrules + pass] & > sort_position; +} > + > +/* Do the transformation. */ > +static size_t > +do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n, > + const locale_data_t *l_data) > +{ > + int32_t weight_idx; > + unsigned char rule_idx; > uint_fast32_t pass; > - size_t needed; > + size_t needed = 0; > size_t last_needed; > - const USTRING_TYPE *usrc; > - size_t srclen = STRLEN (src); > - int32_t *idxarr; > - unsigned char *rulearr; > - size_t idxmax; > - size_t idxcnt; > - int use_malloc; > > - if (nrules == 0) > + /* Now the passes over the weights. */ > + for (pass = 0; pass < l_data->nrules; ++pass) > { > - if (n != 0) > - STPNCPY (dest, src, MIN (srclen + 1, n)); > + size_t backw_len = 0; > + last_needed = needed; > + const USTRING_TYPE *cur = usrc; > + const USTRING_TYPE *backw_start = NULL; > > - return srclen; > - } > + /* We assume that if a rule has defined `position' in one > section > + this is true for all of them. */ > + int position = find_position (cur, l_data, pass); > > - rulesets = (const unsigned char *) > - current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; > - table = (const int32_t *) > - current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; > - weights = (const USTRING_TYPE *) > - current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; > - extra = (const USTRING_TYPE *) > - current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; > - indirect = (const int32_t *) > - current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; > - use_malloc = 0; > + if (position == 0) > + { > + while (*cur != L('\0')) > + { > + const USTRING_TYPE *pos = cur; > + size_t len = find_idx (&cur, &weight_idx, &rule_idx, > l_data, > + pass); > + int rule = l_data->rulesets[rule_idx * l_data->nrules > + pass]; > - assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); > - assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); > - assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0); > - assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); > + if ((rule & sort_forward) != 0) > + { > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { > + for (size_t i = backw_len; i > 0; ) > + { > + int32_t weight_idx; > + unsigned char rule_idx; > + size_t len = find_idx (&backw_start, > &weight_idx, > + &rule_idx, l_data, > pass); > + if (needed + i < n) > + for (size_t j = len; j > 0; j--) > + dest[needed + i - j] = > + l_data->weights[weight_idx++]; > + > + i -= len; > + } > > - /* Handle an empty string as a special case. */ > - if (srclen == 0) > - { > - if (n != 0) > - *dest = L('\0'); > - return 0; > - } > + needed += backw_len; > + backw_start = NULL; > + backw_len = 0; > + } > > - /* We need the elements of the string as unsigned values since they > - are used as indeces. */ > - usrc = (const USTRING_TYPE *) src; > - > - /* Perform the first pass over the string and while doing this find > - and store the weights for each character. Since we want this to > - be as fast as possible we are using `alloca' to store the > temporary > - values. But since there is no limit on the length of the string > - we have to use `malloc' if the string is too long. We should be > - very conservative here. */ > - if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1))) > - { > - idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) > + 1)); > - rulearr = (unsigned char *) &idxarr[srclen]; > - > - if (idxarr == NULL) > - /* No memory. Well, go with the stack then. > - > - XXX Once this implementation is stable we will handle this > - differently. Instead of precomputing the indeces we will > - do this in time. This means, though, that this happens > for > - every pass again. */ > - goto try_stack; > - use_malloc = 1; > - } > - else > - { > - try_stack: > - idxarr = (int32_t *) alloca (srclen * sizeof (int32_t)); > - rulearr = (unsigned char *) alloca (srclen + 1); > + /* Now handle the forward element. */ > + if (needed + len < n) > + while (len-- > 0) > + dest[needed++] = l_data->weights[weight_idx++]; > + else > + /* No more characters fit into the buffer. */ > + needed += len; > + } > + else > + { > + /* Remember start of the backward sequence & track > length. */ > + if (backw_start == NULL) > + backw_start = pos; > + backw_len += len; > + } > + } > + > + > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { > + for (size_t i = backw_len; i > 0; ) > + { > + size_t len = find_idx (&backw_start, &weight_idx, > &rule_idx, > + l_data, pass); > + if (needed + i < n) > + for (size_t j = len; j > 0; j--) > + dest[needed + i - j] = > + l_data->weights[weight_idx++]; > + > + i -= len; > + } > + > + needed += backw_len; > + } > + } > + else > + { > + int val = 1; > +#ifndef WIDE_CHAR_VERSION > + char buf[7]; > + size_t buflen; > +#endif > + size_t i; > + > + while (*cur != L('\0')) > + { > + const USTRING_TYPE *pos = cur; > + size_t len = find_idx (&cur, &weight_idx, &rule_idx, > l_data, > + pass); > + int rule = l_data->rulesets[rule_idx * l_data->nrules > + pass]; + > + if ((rule & sort_forward) != 0) > + { > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { > + for (size_t p = backw_len; p > 0; p--) > + { > + size_t len; > + int32_t weight_idx; > + unsigned char rule_idx; > + const USTRING_TYPE *backw_cur = > backw_start; + > + /* To prevent a warning init the used > vars. */ > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + for (i = 1; i < p; i++) > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + if (len != 0) > + { > +#ifdef WIDE_CHAR_VERSION > + if (needed + 1 + len < n) > + { > + dest[needed] = val; > + for (i = 0; i < len; ++i) > + dest[needed + 1 + i] = > + l_data->weights[weight_idx + > i]; > + } > + needed += 1 + len; > +#else > + buflen = utf8_encode (buf, val); > + if (needed + buflen + len < n) > + { > + for (i = 0; i < buflen; ++i) > + dest[needed + i] = buf[i]; > + for (i = 0; i < len; ++i) > + dest[needed + buflen + i] = > + l_data->weights[weight_idx + > i]; > + } > + needed += buflen + len; > +#endif > + val = 1; > + } > + else > + ++val; > + } > + > + backw_start = NULL; > + backw_len = 0; > + } > + > + /* Now handle the forward element. */ > + if (len != 0) > + { > +#ifdef WIDE_CHAR_VERSION > + if (needed + 1 + len < n) > + { > + dest[needed] = val; > + for (i = 0; i < len; ++i) > + dest[needed + 1 + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += 1 + len; > +#else > + buflen = utf8_encode (buf, val); > + if (needed + buflen + len < n) > + { > + for (i = 0; i < buflen; ++i) > + dest[needed + i] = buf[i]; > + for (i = 0; i < len; ++i) > + dest[needed + buflen + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += buflen + len; > +#endif > + val = 1; > + } > + else > + ++val; > + } > + else > + { > + /* Remember start of the backward sequence & track > length. */ > + if (backw_start == NULL) > + backw_start = pos; > + backw_len++; > + } > + } > + > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { > + for (size_t p = backw_len; p > 0; p--) > + { > + size_t len; > + int32_t weight_idx; > + unsigned char rule_idx; > + const USTRING_TYPE *backw_cur = backw_start; > + > + /* To prevent a warning init the used vars. */ > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + for (i = 1; i < p; i++) > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + if (len != 0) > + { > +#ifdef WIDE_CHAR_VERSION > + if (needed + 1 + len < n) > + { > + dest[needed] = val; > + for (i = 0; i < len; ++i) > + dest[needed + 1 + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += 1 + len; > +#else > + buflen = utf8_encode (buf, val); > + if (needed + buflen + len < n) > + { > + for (i = 0; i < buflen; ++i) > + dest[needed + i] = buf[i]; > + for (i = 0; i < len; ++i) > + dest[needed + buflen + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += buflen + len; > +#endif > + val = 1; > + } > + else > + ++val; > + } > + } > + } > + > + /* Finally store the byte to separate the passes or terminate > + the string. */ > + if (needed < n) > + dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0'); > + ++needed; > } > > - idxmax = 0; > - do > + /* This is a little optimization: many collation specifications > have > + a `position' rule at the end and if no non-ignored character > + is found the last \1 byte is immediately followed by a \0 byte > + signalling this. We can avoid the \1 byte(s). */ > + if (needed > 2 && needed == last_needed + 1) > { > - int32_t tmp = findidx (table, indirect, extra, &usrc, -1); > - rulearr[idxmax] = tmp >> 24; > - idxarr[idxmax] = tmp & 0xffffff; > - > - ++idxmax; > + /* Remove the \1 byte. */ > + if (--needed <= n) > + dest[needed - 1] = L('\0'); > } > - while (*usrc != L('\0')); > > - /* This element is only read, the value never used but to determine > - another value which then is ignored. */ > - rulearr[idxmax] = '\0'; > + /* Return the number of bytes/words we need, but don't count the > NUL > + byte/word at the end. */ > + return needed - 1; > +} > + > +/* Do the transformation using weight-index and rule cache. */ > +static size_t > +do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t > *l_data, > + size_t idxmax, int32_t *idxarr, const unsigned char > *rulearr) +{ > + uint_fast32_t nrules = l_data->nrules; > + unsigned char *rulesets = l_data->rulesets; > + USTRING_TYPE *weights = l_data->weights; > + uint_fast32_t pass; > + size_t needed = 0; > + size_t last_needed; > + size_t idxcnt; > > - /* Now the passes over the weights. We now use the indeces we > found > - before. */ > - needed = 0; > + /* Now the passes over the weights. */ > for (pass = 0; pass < nrules; ++pass) > { > size_t backw_stop = ~0ul; > @@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE > *src, size_t n, __locale_t l) dest[needed - 1] = L('\0'); > } > > - /* Free the memory if needed. */ > - if (use_malloc) > - free (idxarr); > - > /* Return the number of bytes/words we need, but don't count the > NUL byte/word at the end. */ > return needed - 1; > } > + > +size_t > +STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, > __locale_t l) +{ > + locale_data_t l_data; > + struct __locale_data *current = l->__locales[LC_COLLATE]; > + l_data.nrules = current->values[_NL_ITEM_INDEX > (_NL_COLLATE_NRULES)].word; + > + /* Handle byte comparison case. */ > + if (l_data.nrules == 0) > + { > + size_t srclen = STRLEN (src); > + > + if (n != 0) > + STPNCPY (dest, src, MIN (srclen + 1, n)); > + > + return srclen; > + } > + > + /* Handle an empty string, code hereafter relies on strlen (src) > > 0. */ > + if (*src == L('\0')) > + { > + if (n != 0) > + *dest = L('\0'); > + return 0; > + } > + > + /* Get the locale data. */ > + l_data.rulesets = (unsigned char *) > + current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; > + l_data.table = (int32_t *) > + current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; > + l_data.weights = (USTRING_TYPE *) > + current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; > + l_data.extra = (USTRING_TYPE *) > + current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; > + l_data.indirect = (int32_t *) > + current->values[_NL_ITEM_INDEX > (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; + > + assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) > == 0); > + assert (((uintptr_t) l_data.weights) % __alignof__ > (l_data.weights[0]) == 0); > + assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) > == 0); > + assert (((uintptr_t) l_data.indirect) % __alignof__ > (l_data.indirect[0]) == 0); + > + /* We need the elements of the string as unsigned values since they > + are used as indeces. */ > + const USTRING_TYPE *usrc = (const USTRING_TYPE *) src; > + > + /* Allocate cache for small strings on the stack and fill it with > weight and > + rule indices. If the cache size is not sufficient, continue > with the > + uncached xfrm version. */ > + size_t idxmax = 0; > + const USTRING_TYPE *cur = usrc; > + int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t)); > + unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1); > + > + do > + { > + int32_t tmp = findidx (l_data.table, l_data.indirect, > l_data.extra, &cur, > + -1); > + rulearr[idxmax] = tmp >> 24; > + idxarr[idxmax] = tmp & 0xffffff; > + > + ++idxmax; > + } > + while (*cur != L('\0') && idxmax < SMALL_STR_SIZE); > + > + /* This element is only read, the value never used but to determine > + another value which then is ignored. */ > + rulearr[idxmax] = '\0'; > + > + /* Do the transformation. */ > + if (*cur == L('\0')) > + return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, > rulearr); > + else > + return do_xfrm (usrc, dest, n, &l_data); > +} > libc_hidden_def (STRXFRM) > > #ifndef WIDE_CHAR_VERSION
diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh index dc23268..1a10262 100644 --- a/localedata/sort-test.sh +++ b/localedata/sort-test.sh @@ -53,11 +53,18 @@ for l in $lang; do ${common_objpfx}localedata/xfrm-test $id < $cns.in \ > ${common_objpfx}localedata/$cns.xout || here=1 cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1 + ${test_program_prefix_before_env} \ + ${run_program_env} \ + LC_ALL=$l ${test_program_prefix_after_env} \ + ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \ + > ${common_objpfx}localedata/$cns.nocache.xout || here=1 + cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || here=1 if test $here -eq 0; then echo "$l xfrm-test OK" else echo "$l xfrm-test FAIL" diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/ /' + diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | sed 's/^/ /' status=1 fi done diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c index 79494ba..3ab2140 100644 --- a/localedata/xfrm-test.c +++ b/localedata/xfrm-test.c @@ -23,7 +23,10 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <stdbool.h> +/* Keep in sync with string/strxfrm_l.c. */ +#define SMALL_STR_SIZE 4095 struct lines { @@ -37,6 +40,7 @@ int main (int argc, char *argv[]) { int result = 0; + bool nocache = false; size_t nstrings, nstrings_max; struct lines *strings; char *line = NULL; @@ -44,7 +48,18 @@ main (int argc, char *argv[]) size_t n; if (argc < 2) - error (1, 0, "usage: %s <random seed>", argv[0]); + error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]); + + if (argc == 3) + { + if (strcmp (argv[2], "-nocache") == 0) + nocache = true; + else + { + printf ("Unknown option %s!\n", argv[2]); + exit (1); + } + } setlocale (LC_ALL, ""); @@ -59,9 +74,9 @@ main (int argc, char *argv[]) while (1) { - char saved, *newp; - int needed; - int l; + char saved, *word, *newp; + size_t l, line_len, needed; + if (getline (&line, &len, stdin) < 0) break; @@ -83,10 +98,35 @@ main (int argc, char *argv[]) saved = line[l]; line[l] = '\0'; - needed = strxfrm (NULL, line, 0); + + if (nocache) + { + line_len = strlen (line); + word = malloc (line_len + SMALL_STR_SIZE + 1); + if (word == NULL) + { + printf ("malloc failed: %m\n"); + exit (1); + } + memset (word, ' ', SMALL_STR_SIZE); + memcpy (word + SMALL_STR_SIZE, line, line_len); + word[line_len + SMALL_STR_SIZE] = '\0'; + } + else + word = line; + + needed = strxfrm (NULL, word, 0); newp = malloc (needed + 1); - strxfrm (newp, line, needed + 1); + if (newp == NULL) + { + printf ("malloc failed: %m\n"); + exit (1); + } + strxfrm (newp, word, needed + 1); strings[nstrings].xfrm = newp; + + if (nocache) + free (word); line[l] = saved; ++nstrings; } diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c index 9849447..921d1f7 100644 --- a/string/strxfrm_l.c +++ b/string/strxfrm_l.c @@ -40,9 +40,24 @@ #define CONCAT(a,b) CONCAT1(a,b) #define CONCAT1(a,b) a##b +/* Maximum string size that is calculated with cached indices. Right now this + is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be + lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */ +#define SMALL_STR_SIZE 4095 + #include "../locale/localeinfo.h" #include WEIGHT_H +/* Group locale data for shorter parameter lists. */ +typedef struct +{ + uint_fast32_t nrules; + unsigned char *rulesets; + USTRING_TYPE *weights; + int32_t *table; + USTRING_TYPE *extra; + int32_t *indirect; +} locale_data_t; #ifndef WIDE_CHAR_VERSION @@ -81,113 +96,325 @@ utf8_encode (char *buf, int val) } #endif +/* Find next weight and rule index. Inlined since called for every char. */ +static __always_inline size_t +find_idx (const USTRING_TYPE **us, int32_t *weight_idx, + unsigned char *rule_idx, const locale_data_t *l_data, const int pass) +{ + int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us, + -1); + *rule_idx = tmp >> 24; + int32_t idx = tmp & 0xffffff; + size_t len = l_data->weights[idx++]; + + /* Skip over indices of previous levels. */ + for (int i = 0; i < pass; i++) + { + idx += len; + len = l_data->weights[idx++]; + } -size_t -STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) + *weight_idx = idx; + return len; +} + +static int +find_position (const USTRING_TYPE *us, const locale_data_t *l_data, + const int pass) { - struct __locale_data *current = l->__locales[LC_COLLATE]; - uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; - /* We don't assign the following values right away since it might be - unnecessary in case there are no rules. */ - const unsigned char *rulesets; - const int32_t *table; - const USTRING_TYPE *weights; - const USTRING_TYPE *extra; - const int32_t *indirect; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *usrc = us; + + find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass); + return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position; +} + +/* Do the transformation. */ +static size_t +do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n, + const locale_data_t *l_data) +{ + int32_t weight_idx; + unsigned char rule_idx; uint_fast32_t pass; - size_t needed; + size_t needed = 0; size_t last_needed; - const USTRING_TYPE *usrc; - size_t srclen = STRLEN (src); - int32_t *idxarr; - unsigned char *rulearr; - size_t idxmax; - size_t idxcnt; - int use_malloc; - if (nrules == 0) + /* Now the passes over the weights. */ + for (pass = 0; pass < l_data->nrules; ++pass) { - if (n != 0) - STPNCPY (dest, src, MIN (srclen + 1, n)); + size_t backw_len = 0; + last_needed = needed; + const USTRING_TYPE *cur = usrc; + const USTRING_TYPE *backw_start = NULL; - return srclen; - } + /* We assume that if a rule has defined `position' in one section + this is true for all of them. */ + int position = find_position (cur, l_data, pass); - rulesets = (const unsigned char *) - current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; - table = (const int32_t *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; - weights = (const USTRING_TYPE *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; - extra = (const USTRING_TYPE *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; - indirect = (const int32_t *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; - use_malloc = 0; + if (position == 0) + { + while (*cur != L('\0')) + { + const USTRING_TYPE *pos = cur; + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, + pass); + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; - assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); - assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); - assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0); - assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); + if ((rule & sort_forward) != 0) + { + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t i = backw_len; i > 0; ) + { + int32_t weight_idx; + unsigned char rule_idx; + size_t len = find_idx (&backw_start, &weight_idx, + &rule_idx, l_data, pass); + if (needed + i < n) + for (size_t j = len; j > 0; j--) + dest[needed + i - j] = + l_data->weights[weight_idx++]; + + i -= len; + } - /* Handle an empty string as a special case. */ - if (srclen == 0) - { - if (n != 0) - *dest = L('\0'); - return 0; - } + needed += backw_len; + backw_start = NULL; + backw_len = 0; + } - /* We need the elements of the string as unsigned values since they - are used as indeces. */ - usrc = (const USTRING_TYPE *) src; - - /* Perform the first pass over the string and while doing this find - and store the weights for each character. Since we want this to - be as fast as possible we are using `alloca' to store the temporary - values. But since there is no limit on the length of the string - we have to use `malloc' if the string is too long. We should be - very conservative here. */ - if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1))) - { - idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1)); - rulearr = (unsigned char *) &idxarr[srclen]; - - if (idxarr == NULL) - /* No memory. Well, go with the stack then. - - XXX Once this implementation is stable we will handle this - differently. Instead of precomputing the indeces we will - do this in time. This means, though, that this happens for - every pass again. */ - goto try_stack; - use_malloc = 1; - } - else - { - try_stack: - idxarr = (int32_t *) alloca (srclen * sizeof (int32_t)); - rulearr = (unsigned char *) alloca (srclen + 1); + /* Now handle the forward element. */ + if (needed + len < n) + while (len-- > 0) + dest[needed++] = l_data->weights[weight_idx++]; + else + /* No more characters fit into the buffer. */ + needed += len; + } + else + { + /* Remember start of the backward sequence & track length. */ + if (backw_start == NULL) + backw_start = pos; + backw_len += len; + } + } + + + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t i = backw_len; i > 0; ) + { + size_t len = find_idx (&backw_start, &weight_idx, &rule_idx, + l_data, pass); + if (needed + i < n) + for (size_t j = len; j > 0; j--) + dest[needed + i - j] = + l_data->weights[weight_idx++]; + + i -= len; + } + + needed += backw_len; + } + } + else + { + int val = 1; +#ifndef WIDE_CHAR_VERSION + char buf[7]; + size_t buflen; +#endif + size_t i; + + while (*cur != L('\0')) + { + const USTRING_TYPE *pos = cur; + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, + pass); + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; + + if ((rule & sort_forward) != 0) + { + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t p = backw_len; p > 0; p--) + { + size_t len; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *backw_cur = backw_start; + + /* To prevent a warning init the used vars. */ + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + for (i = 1; i < p; i++) + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + + backw_start = NULL; + backw_len = 0; + } + + /* Now handle the forward element. */ + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + else + { + /* Remember start of the backward sequence & track length. */ + if (backw_start == NULL) + backw_start = pos; + backw_len++; + } + } + + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t p = backw_len; p > 0; p--) + { + size_t len; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *backw_cur = backw_start; + + /* To prevent a warning init the used vars. */ + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + for (i = 1; i < p; i++) + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + } + } + + /* Finally store the byte to separate the passes or terminate + the string. */ + if (needed < n) + dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0'); + ++needed; } - idxmax = 0; - do + /* This is a little optimization: many collation specifications have + a `position' rule at the end and if no non-ignored character + is found the last \1 byte is immediately followed by a \0 byte + signalling this. We can avoid the \1 byte(s). */ + if (needed > 2 && needed == last_needed + 1) { - int32_t tmp = findidx (table, indirect, extra, &usrc, -1); - rulearr[idxmax] = tmp >> 24; - idxarr[idxmax] = tmp & 0xffffff; - - ++idxmax; + /* Remove the \1 byte. */ + if (--needed <= n) + dest[needed - 1] = L('\0'); } - while (*usrc != L('\0')); - /* This element is only read, the value never used but to determine - another value which then is ignored. */ - rulearr[idxmax] = '\0'; + /* Return the number of bytes/words we need, but don't count the NUL + byte/word at the end. */ + return needed - 1; +} + +/* Do the transformation using weight-index and rule cache. */ +static size_t +do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data, + size_t idxmax, int32_t *idxarr, const unsigned char *rulearr) +{ + uint_fast32_t nrules = l_data->nrules; + unsigned char *rulesets = l_data->rulesets; + USTRING_TYPE *weights = l_data->weights; + uint_fast32_t pass; + size_t needed = 0; + size_t last_needed; + size_t idxcnt; - /* Now the passes over the weights. We now use the indeces we found - before. */ - needed = 0; + /* Now the passes over the weights. */ for (pass = 0; pass < nrules; ++pass) { size_t backw_stop = ~0ul; @@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) dest[needed - 1] = L('\0'); } - /* Free the memory if needed. */ - if (use_malloc) - free (idxarr); - /* Return the number of bytes/words we need, but don't count the NUL byte/word at the end. */ return needed - 1; } + +size_t +STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) +{ + locale_data_t l_data; + struct __locale_data *current = l->__locales[LC_COLLATE]; + l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; + + /* Handle byte comparison case. */ + if (l_data.nrules == 0) + { + size_t srclen = STRLEN (src); + + if (n != 0) + STPNCPY (dest, src, MIN (srclen + 1, n)); + + return srclen; + } + + /* Handle an empty string, code hereafter relies on strlen (src) > 0. */ + if (*src == L('\0')) + { + if (n != 0) + *dest = L('\0'); + return 0; + } + + /* Get the locale data. */ + l_data.rulesets = (unsigned char *) + current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; + l_data.table = (int32_t *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; + l_data.weights = (USTRING_TYPE *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; + l_data.extra = (USTRING_TYPE *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; + l_data.indirect = (int32_t *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; + + assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0); + assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0); + assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0); + assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0); + + /* We need the elements of the string as unsigned values since they + are used as indeces. */ + const USTRING_TYPE *usrc = (const USTRING_TYPE *) src; + + /* Allocate cache for small strings on the stack and fill it with weight and + rule indices. If the cache size is not sufficient, continue with the + uncached xfrm version. */ + size_t idxmax = 0; + const USTRING_TYPE *cur = usrc; + int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t)); + unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1); + + do + { + int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur, + -1); + rulearr[idxmax] = tmp >> 24; + idxarr[idxmax] = tmp & 0xffffff; + + ++idxmax; + } + while (*cur != L('\0') && idxmax < SMALL_STR_SIZE); + + /* This element is only read, the value never used but to determine + another value which then is ignored. */ + rulearr[idxmax] = '\0'; + + /* Do the transformation. */ + if (*cur == L('\0')) + return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr); + else + return do_xfrm (usrc, dest, n, &l_data); +} libc_hidden_def (STRXFRM) #ifndef WIDE_CHAR_VERSION