[v2,BZ,15884] strcoll: improve performance by removing the cache

Message ID	20141017102034.GO1552@spoyarek.pnq.redhat.com
State	New
Headers	show Return-Path: <libc-alpha-return-53590-incoming=patchwork.ozlabs.org@sourceware.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:date:from:to:cc:subject:message-id:references :mime-version:content-type:in-reply-to; q=dns; s=default; b=HV92 r17KCbrSfkP30wDWPsSmR79lhzBDgPJ7plYS1BJakR4X5a/SNIwUSXlNTD2tI1tr P1PkJuEoj9CVFNbKztUebdTNhbASvswmo9IzL4imRpPVep49fMmQSvcr5DEztUFe rbu9g8lS0yabGwvryblC6dAP+F8PtraKBCthozI= Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk Sender: libc-alpha-owner@sourceware.org Date: Fri, 17 Oct 2014 15:50:34 +0530 From: Siddhesh Poyarekar <siddhesh@redhat.com> To: Leonhard Holz <leonhard.holz@web.de> Cc: libc-alpha@sourceware.org Subject: Re: [Patch v2] [BZ 15884] strcoll: improve performance by removing the cache Message-ID: <20141017102034.GO1552@spoyarek.pnq.redhat.com> References: <543D20F8.9000308@web.de> <20141017100205.GN1552@spoyarek.pnq.redhat.com> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="Ucgz5Oc/kKURWzXs" Content-Disposition: inline In-Reply-To: <20141017100205.GN1552@spoyarek.pnq.redhat.com> User-Agent: Mutt/1.5.22.1-rc1 (2013-10-16)

diff --git a/ChangeLog b/ChangeLog index 6c6d9d8..1aa1e6e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2014-10-17 Leonhard Holz <leonhard.holz@web.de> + + [BZ #15884] + * string/strcoll_l.c: Don't include stdio.h. + (coll_seq): Remove members idxarr and rulearr. + (get_next_seq_cached): Remove function. + (get_next_seq): Likewise. + (get_next_seq_nocache): Rename to get_next_seq. + (do_compare): Remove function. + (do_compare_nocache): Rename to do_compare. + (STRCOLL): Remove weight and rules cache. + 2014-10-16 Roland McGrath <roland@hack.frob.com> * sysdeps/arm/soft-fp/sfp-machine.h: Filed moved ... diff --git a/NEWS b/NEWS index 987f306..f3e222d 100644 --- a/NEWS +++ b/NEWS @@ -9,7 +9,7 @@ Version 2.21 * The following bugs are resolved with this release: - 6652, 12926, 14171, 17266, 17363, 17370, 17371, 17411, 17460. + 6652, 12926, 14171, 15884, 17266, 17363, 17370, 17371, 17411, 17460. Version 2.20 diff --git a/string/strcoll_l.c b/string/strcoll_l.c index d4f42a3..6677eaf 100644 --- a/string/strcoll_l.c +++ b/string/strcoll_l.c @@ -22,7 +22,6 @@ #include <locale.h> #include <stddef.h> #include <stdint.h> -#include <stdlib.h> #include <string.h> #include <sys/param.h> @@ -55,8 +54,6 @@ typedef struct size_t backw; /* Current Backward sequence index. */ size_t backw_stop; /* Index where the backward sequences stop. */ const USTRING_TYPE *us; /* The string. */ - int32_t *idxarr; /* Array to cache weight indices. */ - unsigned char *rulearr; /* Array to cache rules. */ unsigned char rule; /* Saved rule for the first sequence. */ int32_t idx; /* Index to weight of the current sequence. */ int32_t save_idx; /* Save looked up index of a forward @@ -65,179 +62,9 @@ typedef struct const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */ } coll_seq; -/* Get next sequence. The weight indices are cached, so we don't need to - traverse the string. */ -static void -get_next_seq_cached (coll_seq *seq, int nrules, int pass, - const unsigned char *rulesets, - const USTRING_TYPE *weights) -{ - size_t val = seq->val = 0; - int len = seq->len; - size_t backw_stop = seq->backw_stop; - size_t backw = seq->backw; - size_t idxcnt = seq->idxcnt; - size_t idxmax = seq->idxmax; - size_t idxnow = seq->idxnow; - unsigned char *rulearr = seq->rulearr; - int32_t *idxarr = seq->idxarr; - - while (len == 0) - { - ++val; - if (backw_stop != ~0ul) - { - /* There is something pushed. */ - if (backw == backw_stop) - { - /* The last pushed character was handled. Continue - with forward characters. */ - if (idxcnt < idxmax) - { - idxnow = idxcnt; - backw_stop = ~0ul; - } - else - { - /* Nothing any more. The backward sequence - ended with the last sequence in the string. */ - idxnow = ~0ul; - break; - } - } - else - idxnow = --backw; - } - else - { - backw_stop = idxcnt; - - while (idxcnt < idxmax) - { - if ((rulesets[rulearr[idxcnt] * nrules + pass] - & sort_backward) == 0) - /* No more backward characters to push. */ - break; - ++idxcnt; - } - - if (backw_stop == idxcnt) - { - /* No sequence at all or just one. */ - if (idxcnt == idxmax) - /* Note that LEN is still zero. */ - break; - - backw_stop = ~0ul; - idxnow = idxcnt++; - } - else - /* We pushed backward sequences. */ - idxnow = backw = idxcnt - 1; - } - len = weights[idxarr[idxnow]++]; - } - - /* Update the structure. */ - seq->val = val; - seq->len = len; - seq->backw_stop = backw_stop; - seq->backw = backw; - seq->idxcnt = idxcnt; - seq->idxnow = idxnow; -} - /* Get next sequence. Traverse the string as required. */ static void get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, - const USTRING_TYPE *weights, const int32_t *table, - const USTRING_TYPE *extra, const int32_t *indirect) -{ - size_t val = seq->val = 0; - int len = seq->len; - size_t backw_stop = seq->backw_stop; - size_t backw = seq->backw; - size_t idxcnt = seq->idxcnt; - size_t idxmax = seq->idxmax; - size_t idxnow = seq->idxnow; - unsigned char *rulearr = seq->rulearr; - int32_t *idxarr = seq->idxarr; - const USTRING_TYPE *us = seq->us; - - while (len == 0) - { - ++val; - if (backw_stop != ~0ul) - { - /* There is something pushed. */ - if (backw == backw_stop) - { - /* The last pushed character was handled. Continue - with forward characters. */ - if (idxcnt < idxmax) - { - idxnow = idxcnt; - backw_stop = ~0ul; - } - else - /* Nothing any more. The backward sequence ended with - the last sequence in the string. Note that LEN - is still zero. */ - break; - } - else - idxnow = --backw; - } - else - { - backw_stop = idxmax; - - while (*us != L('\0')) - { - int32_t tmp = findidx (table, indirect, extra, &us, -1); - rulearr[idxmax] = tmp >> 24; - idxarr[idxmax] = tmp & 0xffffff; - idxcnt = idxmax++; - - if ((rulesets[rulearr[idxcnt] * nrules] - & sort_backward) == 0) - /* No more backward characters to push. */ - break; - ++idxcnt; - } - - if (backw_stop >= idxcnt) - { - /* No sequence at all or just one. */ - if (idxcnt == idxmax || backw_stop > idxcnt) - /* Note that LEN is still zero. */ - break; - - backw_stop = ~0ul; - idxnow = idxcnt; - } - else - /* We pushed backward sequences. */ - idxnow = backw = idxcnt - 1; - } - len = weights[idxarr[idxnow]++]; - } - - /* Update the structure. */ - seq->val = val; - seq->len = len; - seq->backw_stop = backw_stop; - seq->backw = backw; - seq->idxcnt = idxcnt; - seq->idxmax = idxmax; - seq->idxnow = idxnow; - seq->us = us; -} - -/* Get next sequence. Traverse the string as required. This function does not - set or use any index or rule cache. */ -static void -get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, const USTRING_TYPE *weights, const int32_t *table, const USTRING_TYPE *extra, const int32_t *indirect, int pass) @@ -366,10 +193,9 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, seq->idx = idx; } -/* Compare two sequences. This version does not use the index and rules - cache. */ +/* Compare two sequences. */ static int -do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position, +do_compare (coll_seq *seq1, coll_seq *seq2, int position, const USTRING_TYPE *weights) { int seq1len = seq1->len; @@ -417,56 +243,6 @@ out: return result; } -/* Compare two sequences using the index cache. */ -static int -do_compare (coll_seq *seq1, coll_seq *seq2, int position, - const USTRING_TYPE *weights) -{ - int seq1len = seq1->len; - int seq2len = seq2->len; - size_t val1 = seq1->val; - size_t val2 = seq2->val; - int32_t *idx1arr = seq1->idxarr; - int32_t *idx2arr = seq2->idxarr; - int idx1now = seq1->idxnow; - int idx2now = seq2->idxnow; - int result = 0; - - /* Test for position if necessary. */ - if (position && val1 != val2) - { - result = val1 > val2 ? 1 : -1; - goto out; - } - - /* Compare the two sequences. */ - do - { - if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) - { - /* The sequences differ. */ - result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]]; - goto out; - } - - /* Increment the offsets. */ - ++idx1arr[idx1now]; - ++idx2arr[idx2now]; - - --seq1len; - --seq2len; - } - while (seq1len > 0 && seq2len > 0); - - if (position && seq1len != seq2len) - result = seq1len - seq2len; - -out: - seq1->len = seq1len; - seq2->len = seq2len; - return result; -} - int STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) { @@ -483,6 +259,10 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) if (nrules == 0) return STRCMP (s1, s2); + /* Catch empty strings. */ + if (__glibc_unlikely (*s1 == '\0') || __glibc_unlikely (*s2 == '\0')) + return (*s1 != '\0') - (*s2 != '\0'); + rulesets = (const unsigned char *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; table = (const int32_t *) @@ -499,65 +279,12 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0); assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); - /* We need this a few times. */ - size_t s1len = STRLEN (s1); - size_t s2len = STRLEN (s2); - - /* Catch empty strings. */ - if (__glibc_unlikely (s1len == 0) || __glibc_unlikely (s2len == 0)) - return (s1len != 0) - (s2len != 0); - - /* Perform the first pass over the string and while doing this find - and store the weights for each character. Since we want this to - be as fast as possible we are using `alloca' to store the temporary - values. But since there is no limit on the length of the string - we have to use `malloc' if the string is too long. We should be - very conservative here. - - Please note that the localedef programs makes sure that `position' - is not used at the first level. */ + int result = 0, rule = 0; coll_seq seq1, seq2; - bool use_malloc = false; - int result = 0; - memset (&seq1, 0, sizeof (seq1)); seq2 = seq1; - size_t size_max = SIZE_MAX / (sizeof (int32_t) + 1); - - if (MIN (s1len, s2len) > size_max - || MAX (s1len, s2len) > size_max - MIN (s1len, s2len)) - { - /* If the strings are long enough to cause overflow in the size request, - then skip the allocation and proceed with the non-cached routines. */ - } - else if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) - { - seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); - - /* If we failed to allocate memory, we leave everything as NULL so that - we use the nocache version of traversal and comparison functions. */ - if (seq1.idxarr != NULL) - { - seq2.idxarr = &seq1.idxarr[s1len]; - seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len]; - seq2.rulearr = &seq1.rulearr[s1len]; - use_malloc = true; - } - } - else - { - seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t)); - seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t)); - seq1.rulearr = (unsigned char *) alloca (s1len); - seq2.rulearr = (unsigned char *) alloca (s2len); - } - - int rule = 0; - - /* Cache values in the first pass and if needed, use them in subsequent - passes. */ for (int pass = 0; pass < nrules; ++pass) { seq1.idxcnt = 0; @@ -575,64 +302,45 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) seq2.us = (const USTRING_TYPE *) s2; /* We assume that if a rule has defined `position' in one section - this is true for all of them. */ + this is true for all of them. Please note that the localedef programs + makes sure that `position' is not used at the first level. */ + int position = rulesets[rule * nrules + pass] & sort_position; while (1) { - if (__glibc_unlikely (seq1.idxarr == NULL)) - { - get_next_seq_nocache (&seq1, nrules, rulesets, weights, table, + get_next_seq (&seq1, nrules, rulesets, weights, table, extra, indirect, pass); - get_next_seq_nocache (&seq2, nrules, rulesets, weights, table, + get_next_seq (&seq2, nrules, rulesets, weights, table, extra, indirect, pass); - } - else if (pass == 0) - { - get_next_seq (&seq1, nrules, rulesets, weights, table, extra, - indirect); - get_next_seq (&seq2, nrules, rulesets, weights, table, extra, - indirect); - } - else - { - get_next_seq_cached (&seq1, nrules, pass, rulesets, weights); - get_next_seq_cached (&seq2, nrules, pass, rulesets, weights); - } - /* See whether any or both strings are empty. */ if (seq1.len == 0 || seq2.len == 0) { if (seq1.len == seq2.len) - /* Both ended. So far so good, both strings are equal - at this level. */ - break; + { + /* Both strings ended and are equal at this level. Do a + byte-level comparison to ensure that we don't waste time + going through multiple passes for totally equal strings + before proceeding to subsequent passes. */ + if (pass == 0 && STRCMP (s1, s2) == 0) + return result; + else + break; + } /* This means one string is shorter than the other. Find out which one and return an appropriate value. */ - result = seq1.len == 0 ? -1 : 1; - goto free_and_return; + return seq1.len == 0 ? -1 : 1; } - if (__glibc_unlikely (seq1.idxarr == NULL)) - result = do_compare_nocache (&seq1, &seq2, position, weights); - else - result = do_compare (&seq1, &seq2, position, weights); + result = do_compare (&seq1, &seq2, position, weights); if (result != 0) - goto free_and_return; + return result; } - if (__glibc_likely (seq1.rulearr != NULL)) - rule = seq1.rulearr[0]; - else - rule = seq1.rule; + rule = seq1.rule; } - /* Free the memory if needed. */ - free_and_return: - if (use_malloc) - free (seq1.idxarr); - return result; } libc_hidden_def (STRCOLL)

[v2,BZ,15884] strcoll: improve performance by removing the cache

Commit Message

Patch