Message ID | AM5PR0802MB2610F3F2F9864ECF0089F9E583AD0@AM5PR0802MB2610.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
On 28/10/2016 09:35, Wilco Dijkstra wrote: > Improve strtok(_r) performance. Instead of calling strpbrk which calls > strcspn, call strcspn directly so we get the end of the token without > an extra call to rawmemchr. Also avoid an unnecessary call to strcspn after > the last token by adding an early exit for an empty string. The result > is a ~2x speedup of strtok on most inputs in bench-strtok. > > Passes regression tests, OK for commit? Why not aim for simplicity and just use strtok_r and strtok? I should be a tail call in most architecture and performance loss should be minimum. Either way LGTM. I also found that powerpc64 optimized one performs worse than this new default one, once you push it in I plan to remove it. > > ChangeLog: > 2015-10-28 Wilco Dijkstra <wdijkstr@arm.com> > > * string/strtok.c (STRTOK): Optimize for performance. > * string/strtok_r.c (__strtok_r): Likewise. > -- > > diff --git a/string/strtok.c b/string/strtok.c > index 7a4574db5c80501e47d045ad4347e8a287b32191..b1ed48c24c8d20706b7d05481a138b18a01ff802 100644 > --- a/string/strtok.c > +++ b/string/strtok.c > @@ -38,11 +38,18 @@ static char *olds; > char * > STRTOK (char *s, const char *delim) > { > - char *token; > + char *end; > > if (s == NULL) > s = olds; > > + /* Return immediately at end of string. */ > + if (*s == '\0') > + { > + olds = s; > + return NULL; > + } > + > /* Scan leading delimiters. */ > s += strspn (s, delim); > if (*s == '\0') > @@ -52,16 +59,15 @@ STRTOK (char *s, const char *delim) > } > > /* Find the end of the token. */ > - token = s; > - s = strpbrk (token, delim); > - if (s == NULL) > - /* This token finishes the string. */ > - olds = __rawmemchr (token, '\0'); > - else > + end = s + strcspn (s, delim); > + if (*end == '\0') > { > - /* Terminate the token and make OLDS point past it. */ > - *s = '\0'; > - olds = s + 1; > + olds = end; > + return s; > } > - return token; > + > + /* Terminate the token and make OLDS point past it. */ > + *end = '\0'; > + olds = end + 1; > + return s; > } > diff --git a/string/strtok_r.c b/string/strtok_r.c > index f351304766108dad2c1cff881ad3bebae821b2a0..e049a5c82e026a3b6c1ba5da16ce81743717805e 100644 > --- a/string/strtok_r.c > +++ b/string/strtok_r.c > @@ -45,11 +45,17 @@ > char * > __strtok_r (char *s, const char *delim, char **save_ptr) > { > - char *token; > + char *end; > > if (s == NULL) > s = *save_ptr; > > + if (*s == '\0') > + { > + *save_ptr = s; > + return NULL; > + } > + > /* Scan leading delimiters. */ > s += strspn (s, delim); > if (*s == '\0') > @@ -59,18 +65,17 @@ __strtok_r (char *s, const char *delim, char **save_ptr) > } > > /* Find the end of the token. */ > - token = s; > - s = strpbrk (token, delim); > - if (s == NULL) > - /* This token finishes the string. */ > - *save_ptr = __rawmemchr (token, '\0'); > - else > + end = s + strcspn (s, delim); > + if (*end == '\0') > { > - /* Terminate the token and make *SAVE_PTR point past it. */ > - *s = '\0'; > - *save_ptr = s + 1; > + *save_ptr = end; > + return s; > } > - return token; > + > + /* Terminate the token and make *SAVE_PTR point past it. */ > + *end = '\0'; > + *save_ptr = end + 1; > + return s; > } > #ifdef weak_alias > libc_hidden_def (__strtok_r) >
diff --git a/string/strtok.c b/string/strtok.c index 7a4574db5c80501e47d045ad4347e8a287b32191..b1ed48c24c8d20706b7d05481a138b18a01ff802 100644 --- a/string/strtok.c +++ b/string/strtok.c @@ -38,11 +38,18 @@ static char *olds; char * STRTOK (char *s, const char *delim) { - char *token; + char *end; if (s == NULL) s = olds; + /* Return immediately at end of string. */ + if (*s == '\0') + { + olds = s; + return NULL; + } + /* Scan leading delimiters. */ s += strspn (s, delim); if (*s == '\0') @@ -52,16 +59,15 @@ STRTOK (char *s, const char *delim) } /* Find the end of the token. */ - token = s; - s = strpbrk (token, delim); - if (s == NULL) - /* This token finishes the string. */ - olds = __rawmemchr (token, '\0'); - else + end = s + strcspn (s, delim); + if (*end == '\0') { - /* Terminate the token and make OLDS point past it. */ - *s = '\0'; - olds = s + 1; + olds = end; + return s; } - return token; + + /* Terminate the token and make OLDS point past it. */ + *end = '\0'; + olds = end + 1; + return s; } diff --git a/string/strtok_r.c b/string/strtok_r.c index f351304766108dad2c1cff881ad3bebae821b2a0..e049a5c82e026a3b6c1ba5da16ce81743717805e 100644 --- a/string/strtok_r.c +++ b/string/strtok_r.c @@ -45,11 +45,17 @@ char * __strtok_r (char *s, const char *delim, char **save_ptr) { - char *token; + char *end; if (s == NULL) s = *save_ptr; + if (*s == '\0') + { + *save_ptr = s; + return NULL; + } + /* Scan leading delimiters. */ s += strspn (s, delim); if (*s == '\0') @@ -59,18 +65,17 @@ __strtok_r (char *s, const char *delim, char **save_ptr) } /* Find the end of the token. */ - token = s; - s = strpbrk (token, delim); - if (s == NULL) - /* This token finishes the string. */ - *save_ptr = __rawmemchr (token, '\0'); - else + end = s + strcspn (s, delim); + if (*end == '\0') { - /* Terminate the token and make *SAVE_PTR point past it. */ - *s = '\0'; - *save_ptr = s + 1; + *save_ptr = end; + return s; } - return token; + + /* Terminate the token and make *SAVE_PTR point past it. */ + *end = '\0'; + *save_ptr = end + 1; + return s; } #ifdef weak_alias libc_hidden_def (__strtok_r)