Message ID | 20230213135558.3328727-3-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | Remove _STRING_ARCH_unaligned | expand |
Hi Adhemerval, + size_t len = __strchrnul (name, '=') - name; This is a change in behaviour, ignoring anything behind a '=' in name. What is wrong with strlen? + for (char **ep = __environ; *ep != NULL; ++ep) { - size_t len = strlen (name); -#if _STRING_ARCH_unaligned - name_start = *(const uint16_t *) name; -#else - name_start = (((const unsigned char *) name)[0] - | (((const unsigned char *) name)[1] << 8)); -#endif - len -= 2; - name += 2; - - for (ep = __environ; *ep != NULL; ++ep) - { -#if _STRING_ARCH_unaligned - uint16_t ep_start = *(uint16_t *) *ep; -#else - uint16_t ep_start = (((unsigned char *) *ep)[0] - | (((unsigned char *) *ep)[1] << 8)); -#endif - - if (name_start == ep_start && !strncmp (*ep + 2, name, len) - && (*ep)[len + 2] == '=') - return &(*ep)[len + 3]; - } + if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') + return *ep + len + 1; It's still useful to keep a check for the first character, eg. name[0] == (*ep)[0] as this is about twice as fast as always calling strncmp. Cheers, Wilco
On 15/02/23 14:50, Wilco Dijkstra wrote: > Hi Adhemerval, > > + size_t len = __strchrnul (name, '=') - name; > > This is a change in behaviour, ignoring anything behind a '=' in name. What is > wrong with strlen? Indeed it is better to keep current semantic. > > + for (char **ep = __environ; *ep != NULL; ++ep) > { > - size_t len = strlen (name); > -#if _STRING_ARCH_unaligned > - name_start = *(const uint16_t *) name; > -#else > - name_start = (((const unsigned char *) name)[0] > - | (((const unsigned char *) name)[1] << 8)); > -#endif > - len -= 2; > - name += 2; > - > - for (ep = __environ; *ep != NULL; ++ep) > - { > -#if _STRING_ARCH_unaligned > - uint16_t ep_start = *(uint16_t *) *ep; > -#else > - uint16_t ep_start = (((unsigned char *) *ep)[0] > - | (((unsigned char *) *ep)[1] << 8)); > -#endif > - > - if (name_start == ep_start && !strncmp (*ep + 2, name, len) > - && (*ep)[len + 2] == '=') > - return &(*ep)[len + 3]; > - } > + if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') > + return *ep + len + 1; > > It's still useful to keep a check for the first character, eg. name[0] == (*ep)[0] as this > is about twice as fast as always calling strncmp. Alright, updated patch below. From ddfb6419c5e74b34fb22454581fa0636275ec942 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <adhemerval.zanella@linaro.org> Date: Thu, 9 Feb 2023 10:36:57 -0300 Subject: [PATCH 2/7] stdlib: Simplify getenv And remove _STRING_ARCH_unaligned usage. Checked on x86_64-linux-gnu and i686-linux-gnu. --- stdlib/getenv.c | 68 +++++++------------------------------------------ 1 file changed, 9 insertions(+), 59 deletions(-) diff --git a/stdlib/getenv.c b/stdlib/getenv.c index e3157ce2f3..61a42d941d 100644 --- a/stdlib/getenv.c +++ b/stdlib/getenv.c @@ -15,76 +15,26 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <endian.h> -#include <errno.h> -#include <stdint.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <unistd.h> - -/* Return the value of the environment variable NAME. This implementation - is tuned a bit in that it assumes no environment variable has an empty - name which of course should always be true. We have a special case for - one character names so that for the general case we can assume at least - two characters which we can access. By doing this we can avoid using the - `strncmp' most of the time. */ char * getenv (const char *name) { - char **ep; - uint16_t name_start; - if (__environ == NULL || name[0] == '\0') return NULL; - if (name[1] == '\0') - { - /* The name of the variable consists of only one character. Therefore - the first two characters of the environment entry are this character - and a '=' character. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN || !_STRING_ARCH_unaligned - name_start = ('=' << 8) | *(const unsigned char *) name; -#else - name_start = '=' | ((*(const unsigned char *) name) << 8); -#endif - for (ep = __environ; *ep != NULL; ++ep) - { -#if _STRING_ARCH_unaligned - uint16_t ep_start = *(uint16_t *) *ep; -#else - uint16_t ep_start = (((unsigned char *) *ep)[0] - | (((unsigned char *) *ep)[1] << 8)); -#endif - if (name_start == ep_start) - return &(*ep)[2]; - } - } - else - { - size_t len = strlen (name); -#if _STRING_ARCH_unaligned - name_start = *(const uint16_t *) name; -#else - name_start = (((const unsigned char *) name)[0] - | (((const unsigned char *) name)[1] << 8)); -#endif - len -= 2; - name += 2; - - for (ep = __environ; *ep != NULL; ++ep) - { -#if _STRING_ARCH_unaligned - uint16_t ep_start = *(uint16_t *) *ep; -#else - uint16_t ep_start = (((unsigned char *) *ep)[0] - | (((unsigned char *) *ep)[1] << 8)); -#endif + bool single_char = name[1] == '\0'; - if (name_start == ep_start && !strncmp (*ep + 2, name, len) - && (*ep)[len + 2] == '=') - return &(*ep)[len + 3]; - } + size_t len = strlen (name);; + for (char **ep = __environ; *ep != NULL; ++ep) + { + if (single_char && (*ep)[0] == name[0] && (*ep)[1] == '=') + return *ep + 2; + else if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') + return *ep + len + 1; } return NULL;
On Feb 16 2023, Adhemerval Zanella Netto via Libc-alpha wrote:
> + size_t len = strlen (name);;
Extra semicolon.
Hi Adhemerval, + size_t len = strlen (name);; + for (char **ep = __environ; *ep != NULL; ++ep) + { + if (single_char && (*ep)[0] == name[0] && (*ep)[1] == '=') + return *ep + 2; + else if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') + return *ep + len + 1; This is about 10-20% slower than the previous patch both for single-char and multi-char case... The approach I showed it is far simpler and 3-4 times faster (within 10% of original performance). Cheers, Wilco
On 16/02/23 15:02, Wilco Dijkstra wrote: > Hi Adhemerval, > > + size_t len = strlen (name);; > + for (char **ep = __environ; *ep != NULL; ++ep) > + { > + if (single_char && (*ep)[0] == name[0] && (*ep)[1] == '=') > + return *ep + 2; > + else if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') > + return *ep + len + 1; > > This is about 10-20% slower than the previous patch both for single-char and > multi-char case... The approach I showed it is far simpler and 3-4 times faster > (within 10% of original performance). Do you mean something like: size_t len = strlen (name);; for (char **ep = __environ; *ep != NULL; ++ep) { if (((*ep)[0] == name[0] && (*ep)[1] == '=') || (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')) return *ep + len + 1; }
Hi Adhemerval, > Do you mean something like: > > size_t len = strlen (name);; > for (char **ep = __environ; *ep != NULL; ++ep) > { > if (((*ep)[0] == name[0] && (*ep)[1] == '=') > || (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')) > return *ep + len + 1; > } No I meant checking the first character first before doing more expensive calls, so: if (name[0] == (*ep)[0] && strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') return *ep + len + 1; Basically this means you get a very tight loop checking the first character - this avoids most calls to strncmp. This is basically the same result as the current implementation but without all the defines and complex unaligned/big-endian stuff! Cheers, Wilco
On 16/02/23 15:47, Wilco Dijkstra wrote: > Hi Adhemerval, > >> Do you mean something like: >> >> size_t len = strlen (name);; >> for (char **ep = __environ; *ep != NULL; ++ep) >> { >> if (((*ep)[0] == name[0] && (*ep)[1] == '=') >> || (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')) >> return *ep + len + 1; >> } > > No I meant checking the first character first before doing more expensive calls, so: > > if (name[0] == (*ep)[0] && strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') > return *ep + len + 1; > > Basically this means you get a very tight loop checking the first character - this avoids > most calls to strncmp. This is basically the same result as the current implementation > but without all the defines and complex unaligned/big-endian stuff! Ah ok, I see it now. Do the follow addresses the issues pointed out? From 373682e6c290f1c38d0319b5695e6a12a388be0c Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella <adhemerval.zanella@linaro.org> Date: Thu, 9 Feb 2023 10:36:57 -0300 Subject: [PATCH 2/7] stdlib: Simplify getenv And remove _STRING_ARCH_unaligned usage. Checked on x86_64-linux-gnu and i686-linux-gnu. --- stdlib/getenv.c | 64 ++++--------------------------------------------- 1 file changed, 5 insertions(+), 59 deletions(-) diff --git a/stdlib/getenv.c b/stdlib/getenv.c index e3157ce2f3..6942d23433 100644 --- a/stdlib/getenv.c +++ b/stdlib/getenv.c @@ -15,76 +15,22 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <endian.h> -#include <errno.h> -#include <stdint.h> #include <stdlib.h> #include <string.h> #include <unistd.h> - -/* Return the value of the environment variable NAME. This implementation - is tuned a bit in that it assumes no environment variable has an empty - name which of course should always be true. We have a special case for - one character names so that for the general case we can assume at least - two characters which we can access. By doing this we can avoid using the - `strncmp' most of the time. */ char * getenv (const char *name) { - char **ep; - uint16_t name_start; - if (__environ == NULL || name[0] == '\0') return NULL; - if (name[1] == '\0') - { - /* The name of the variable consists of only one character. Therefore - the first two characters of the environment entry are this character - and a '=' character. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN || !_STRING_ARCH_unaligned - name_start = ('=' << 8) | *(const unsigned char *) name; -#else - name_start = '=' | ((*(const unsigned char *) name) << 8); -#endif - for (ep = __environ; *ep != NULL; ++ep) - { -#if _STRING_ARCH_unaligned - uint16_t ep_start = *(uint16_t *) *ep; -#else - uint16_t ep_start = (((unsigned char *) *ep)[0] - | (((unsigned char *) *ep)[1] << 8)); -#endif - if (name_start == ep_start) - return &(*ep)[2]; - } - } - else + size_t len = strlen (name);; + for (char **ep = __environ; *ep != NULL; ++ep) { - size_t len = strlen (name); -#if _STRING_ARCH_unaligned - name_start = *(const uint16_t *) name; -#else - name_start = (((const unsigned char *) name)[0] - | (((const unsigned char *) name)[1] << 8)); -#endif - len -= 2; - name += 2; - - for (ep = __environ; *ep != NULL; ++ep) - { -#if _STRING_ARCH_unaligned - uint16_t ep_start = *(uint16_t *) *ep; -#else - uint16_t ep_start = (((unsigned char *) *ep)[0] - | (((unsigned char *) *ep)[1] << 8)); -#endif - - if (name_start == ep_start && !strncmp (*ep + 2, name, len) - && (*ep)[len + 2] == '=') - return &(*ep)[len + 3]; - } + if (name[0] == (*ep)[0] + && strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') + return *ep + len + 1; } return NULL;
Hi Adhemerval, > Ah ok, I see it now. Do the follow addresses the issues pointed out? Yes that looks good to me - just one thing (as pointed out by Andreas): + size_t len = strlen (name);; Double semicolon. OK with that fixed. Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com> Cheers, Wilco
diff --git a/stdlib/getenv.c b/stdlib/getenv.c index e3157ce2f3..449bba1e16 100644 --- a/stdlib/getenv.c +++ b/stdlib/getenv.c @@ -15,76 +15,21 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <endian.h> -#include <errno.h> -#include <stdint.h> #include <stdlib.h> #include <string.h> #include <unistd.h> - -/* Return the value of the environment variable NAME. This implementation - is tuned a bit in that it assumes no environment variable has an empty - name which of course should always be true. We have a special case for - one character names so that for the general case we can assume at least - two characters which we can access. By doing this we can avoid using the - `strncmp' most of the time. */ char * getenv (const char *name) { - char **ep; - uint16_t name_start; - if (__environ == NULL || name[0] == '\0') return NULL; - if (name[1] == '\0') - { - /* The name of the variable consists of only one character. Therefore - the first two characters of the environment entry are this character - and a '=' character. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN || !_STRING_ARCH_unaligned - name_start = ('=' << 8) | *(const unsigned char *) name; -#else - name_start = '=' | ((*(const unsigned char *) name) << 8); -#endif - for (ep = __environ; *ep != NULL; ++ep) - { -#if _STRING_ARCH_unaligned - uint16_t ep_start = *(uint16_t *) *ep; -#else - uint16_t ep_start = (((unsigned char *) *ep)[0] - | (((unsigned char *) *ep)[1] << 8)); -#endif - if (name_start == ep_start) - return &(*ep)[2]; - } - } - else + size_t len = __strchrnul (name, '=') - name; + for (char **ep = __environ; *ep != NULL; ++ep) { - size_t len = strlen (name); -#if _STRING_ARCH_unaligned - name_start = *(const uint16_t *) name; -#else - name_start = (((const unsigned char *) name)[0] - | (((const unsigned char *) name)[1] << 8)); -#endif - len -= 2; - name += 2; - - for (ep = __environ; *ep != NULL; ++ep) - { -#if _STRING_ARCH_unaligned - uint16_t ep_start = *(uint16_t *) *ep; -#else - uint16_t ep_start = (((unsigned char *) *ep)[0] - | (((unsigned char *) *ep)[1] << 8)); -#endif - - if (name_start == ep_start && !strncmp (*ep + 2, name, len) - && (*ep)[len + 2] == '=') - return &(*ep)[len + 3]; - } + if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') + return *ep + len + 1; } return NULL;