Message ID | 20230510195946.3728273-1-josimmon@redhat.com |
---|---|
State | New |
Headers | show |
Series | time: strftime_l: Use malloc rather than an unbounded alloca. | expand |
On 10/05/23 16:59, Joe Simmons-Talbott via Libc-alpha wrote: > Avoid possible stack overflow by replacing alloca() with malloc(). > --- > time/strftime_l.c | 8 ++++++-- > 1 file changed, 6 insertions(+), 2 deletions(-) > > diff --git a/time/strftime_l.c b/time/strftime_l.c > index 402c6c4111..59d3e1a3b2 100644 > --- a/time/strftime_l.c > +++ b/time/strftime_l.c > @@ -273,8 +273,9 @@ static const CHAR_T zeroes[16] = /* "0000000000000000" */ > const char *__s = os; \ > memset (&__st, '\0', sizeof (__st)); \ > l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc); \ > - ws = alloca ((l + 1) * sizeof (wchar_t)); \ > - (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc); \ > + ws = malloc ((l + 1) * sizeof (wchar_t)); \ > + if (ws != NULL) \ > + (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc); \ > } > #endif > > @@ -1346,7 +1347,10 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format, > wchar_t *wczone; > size_t len; > widen (zone, wczone, len); > + if (wczone == NULL) > + return 0; > cpy (len, wczone); > + free (wczone); > } > #else > cpy (strlen (zone), zone); Do we have a practical maximum size for the abbreviate timezone name? The internal tz_rule 'name' field is just a pointer, but I think all timezones uses a maximum name size.
On Tue, May 16, 2023 at 04:28:58PM -0300, Adhemerval Zanella Netto wrote: > > > On 10/05/23 16:59, Joe Simmons-Talbott via Libc-alpha wrote: > > Avoid possible stack overflow by replacing alloca() with malloc(). > > --- > > time/strftime_l.c | 8 ++++++-- > > 1 file changed, 6 insertions(+), 2 deletions(-) > > > > diff --git a/time/strftime_l.c b/time/strftime_l.c > > index 402c6c4111..59d3e1a3b2 100644 > > --- a/time/strftime_l.c > > +++ b/time/strftime_l.c > > @@ -273,8 +273,9 @@ static const CHAR_T zeroes[16] = /* "0000000000000000" */ > > const char *__s = os; \ > > memset (&__st, '\0', sizeof (__st)); \ > > l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc); \ > > - ws = alloca ((l + 1) * sizeof (wchar_t)); \ > > - (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc); \ > > + ws = malloc ((l + 1) * sizeof (wchar_t)); \ > > + if (ws != NULL) \ > > + (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc); \ > > } > > #endif > > > > @@ -1346,7 +1347,10 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format, > > wchar_t *wczone; > > size_t len; > > widen (zone, wczone, len); > > + if (wczone == NULL) > > + return 0; > > cpy (len, wczone); > > + free (wczone); > > } > > #else > > cpy (strlen (zone), zone); > > Do we have a practical maximum size for the abbreviate timezone name? The > internal tz_rule 'name' field is just a pointer, but I think all timezones > uses a maximum name size. > I was able to pass a random string in via the TZ environment variable. I'm not sure if that matters since the size of the buffer (s) and maxsize would still limit the amount of bytes from the TZ variable.
On 5/16/23 12:28, Adhemerval Zanella Netto via Libc-alpha wrote: > Do we have a practical maximum size for the abbreviate timezone name? The > internal tz_rule 'name' field is just a pointer, but I think all timezones > uses a maximum name size. In current TZDB data the longest abbreviation has four bytes (e.g., "AKST", "NZST"). Before TZDB release 2016g (2016-09-13) the longest was 48 bytes, for the Factory zone's "Local time zone must be set--see zic manual page" abbreviation. Although the TZif file format's limit is 2**32 - 1 bytes, the 'zic' command won't process TZif files with abbreviations longer than about 2000 bytes, due to its internal input line buffer. There is a limit even if you use long strings in TZ environment variables, as glibc strftime is buggy when a buffer contains more than INT_MAX bytes (it uses 'int' to store byte counts). This means you can't effectively use a time zone abbreviation longer than about 2**31 - 1 bytes with glibc, even on 64-bit platforms. For example, a program like the one shown below won't work with glibc. For what it's worth, _POSIX_TZNAME_MAX is 6, that is, every POSIX platform must support at least 6 bytes. #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> int main () { size_t Xs = (1LL << 32) + 1; char *tzbuf = malloc (sizeof "TZ=" - 1 + Xs + sizeof "0"); if (!tzbuf) return perror ("malloc tzbuf"), 1; size_t strftimebufsize = Xs + 2; char *strftimebuf = malloc (strftimebufsize); if (!strftimebuf) return perror ("malloc strftimebuf"), 1; strcpy (tzbuf, "TZ="); memset (tzbuf + 3, 'X', Xs); strcpy (tzbuf + 3 + Xs, "0"); if (putenv (tzbuf)) return perror ("putenv"), 1; time_t t0 = 0; struct tm *tm = localtime (&t0); if (!tm) return perror ("localtime"), 1; size_t nbytes = strftime (strftimebuf, strftimebufsize, "%Z", tm); if (!nbytes) return perror ("strftime"), 1; if (puts (strftimebuf) < 0) return perror ("puts"), 1; if (fclose (stdout) < 0) return perror ("fclose"), 1; return 0; }
On 16/05/23 19:00, Paul Eggert wrote: > On 5/16/23 12:28, Adhemerval Zanella Netto via Libc-alpha wrote: >> Do we have a practical maximum size for the abbreviate timezone name? The >> internal tz_rule 'name' field is just a pointer, but I think all timezones >> uses a maximum name size. > > In current TZDB data the longest abbreviation has four bytes (e.g., "AKST", "NZST"). Before TZDB release 2016g (2016-09-13) the longest was 48 bytes, for the Factory zone's "Local time zone must be set--see zic manual page" abbreviation. > > Although the TZif file format's limit is 2**32 - 1 bytes, the 'zic' command won't process TZif files with abbreviations longer than about 2000 bytes, due to its internal input line buffer. > > There is a limit even if you use long strings in TZ environment variables, as glibc strftime is buggy when a buffer contains more than INT_MAX bytes (it uses 'int' to store byte counts). This means you can't effectively use a time zone abbreviation longer than about 2**31 - 1 bytes with glibc, even on 64-bit platforms. For example, a program like the one shown below won't work with glibc. > > For what it's worth, _POSIX_TZNAME_MAX is 6, that is, every POSIX platform must support at least 6 bytes. Thanks for the information, so do you think it would be feasible to assume _POSIX_TZNAME_MAX or 48 (for Factory zone) to avoid the malloc call on strftime_l? > > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > #include <time.h> > > int > main () > { > size_t Xs = (1LL << 32) + 1; > char *tzbuf = malloc (sizeof "TZ=" - 1 + Xs + sizeof "0"); > if (!tzbuf) > return perror ("malloc tzbuf"), 1; > size_t strftimebufsize = Xs + 2; > char *strftimebuf = malloc (strftimebufsize); > if (!strftimebuf) > return perror ("malloc strftimebuf"), 1; > strcpy (tzbuf, "TZ="); > memset (tzbuf + 3, 'X', Xs); > strcpy (tzbuf + 3 + Xs, "0"); > if (putenv (tzbuf)) > return perror ("putenv"), 1; > time_t t0 = 0; > struct tm *tm = localtime (&t0); > if (!tm) > return perror ("localtime"), 1; > size_t nbytes = strftime (strftimebuf, strftimebufsize, "%Z", tm); > if (!nbytes) > return perror ("strftime"), 1; > if (puts (strftimebuf) < 0) > return perror ("puts"), 1; > if (fclose (stdout) < 0) > return perror ("fclose"), 1; > return 0; > } >
On 2023-05-17 04:04, Adhemerval Zanella Netto wrote: > do you think it would be feasible to assume > _POSIX_TZNAME_MAX or 48 (for Factory zone) to avoid the malloc call on > strftime_l? ALthough it's technically feasible, it'd be some work. Among other things, whatever limit we chose would have to become visible via <limits.h> and sysconf and etc., and we'd need to document that there is now a limit. Better, I think, would be to continue to follow the GNU coding standards and not impose an arbitrary limit on time zone abbreviation length, even for the highly unusual case where code is calling wcsftime or wcsftime_l. How about avoiding the malloc in the following way? I installed the attached patch into Gnulib lib/nstrftime.c, which has forked from glibc but with some work could be merged back in, and this should also fix the glibc bugs with buffer sizes exceeding INT_MAX. If you don't want all the hassle of merging, you can cherry-pick this patch. I haven't tested the patch, though, as Gnulib does not use this code.
On Wed, May 17, 2023 at 10:40:05AM -0700, Paul Eggert wrote: > On 2023-05-17 04:04, Adhemerval Zanella Netto wrote: > > do you think it would be feasible to assume > > _POSIX_TZNAME_MAX or 48 (for Factory zone) to avoid the malloc call on > > strftime_l? > > ALthough it's technically feasible, it'd be some work. Among other things, > whatever limit we chose would have to become visible via <limits.h> and > sysconf and etc., and we'd need to document that there is now a limit. > Better, I think, would be to continue to follow the GNU coding standards and > not impose an arbitrary limit on time zone abbreviation length, even for the > highly unusual case where code is calling wcsftime or wcsftime_l. > > How about avoiding the malloc in the following way? I installed the attached > patch into Gnulib lib/nstrftime.c, which has forked from glibc but with some > work could be merged back in, and this should also fix the glibc bugs with > buffer sizes exceeding INT_MAX. > > If you don't want all the hassle of merging, you can cherry-pick this patch. > I haven't tested the patch, though, as Gnulib does not use this code. I've pushed your suggested patch after testing on x86_64 and i686. [1] Thanks, Joe [1] https://sourceware.org/pipermail/libc-alpha/2023-May/148384.html > From 197eec6075bbaf2b97137115a09a6bbce43b4dd4 Mon Sep 17 00:00:00 2001 > From: Paul Eggert <eggert@cs.ucla.edu> > Date: Wed, 17 May 2023 10:27:40 -0700 > Subject: [PATCH] nstrftime: suggest to glibc how to avoid alloca > > * lib/nstrftime.c (widen) [COMPILE_WIDE]: Remove. > (__strftime_internal) [COMPILE_WIDE): Instead of converting the > multibyte time zone abbreviation into a potentially unbounded > alloca buffer, convert it directly into the output buffer. > Although this code is not used in Gnulib, this can help the glibc > developers avoid the problem on the glibc side. > --- > ChangeLog | 10 ++++++++++ > lib/nstrftime.c | 39 +++++++++++++++++++++++++-------------- > 2 files changed, 35 insertions(+), 14 deletions(-) > > diff --git a/ChangeLog b/ChangeLog > index ecbc25ef06..36b3c65b81 100644 > --- a/ChangeLog > +++ b/ChangeLog > @@ -1,3 +1,13 @@ > +2023-05-17 Paul Eggert <eggert@cs.ucla.edu> > + > + nstrftime: suggest to glibc how to avoid alloca > + * lib/nstrftime.c (widen) [COMPILE_WIDE]: Remove. > + (__strftime_internal) [COMPILE_WIDE): Instead of converting the > + multibyte time zone abbreviation into a potentially unbounded > + alloca buffer, convert it directly into the output buffer. > + Although this code is not used in Gnulib, this can help the glibc > + developers avoid the problem on the glibc side. > + > 2023-05-15 Bruno Haible <bruno@clisp.org> > > doc: New chapter "Strings and Characters". > diff --git a/lib/nstrftime.c b/lib/nstrftime.c > index 68bb560910..35a9307e1a 100644 > --- a/lib/nstrftime.c > +++ b/lib/nstrftime.c > @@ -226,15 +226,6 @@ extern char *tzname[]; > # undef __mbsrtowcs_l > # define __mbsrtowcs_l(d, s, l, st, loc) __mbsrtowcs (d, s, l, st) > # endif > -# define widen(os, ws, l) \ > - { \ > - mbstate_t __st; \ > - const char *__s = os; \ > - memset (&__st, '\0', sizeof (__st)); \ > - l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc); \ > - ws = (wchar_t *) alloca ((l + 1) * sizeof (wchar_t)); \ > - (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc); \ > - } > #endif > > > @@ -1374,11 +1365,31 @@ __strftime_internal (STREAM_OR_CHAR_T *s, STRFTIME_ARG (size_t maxsize) > #ifdef COMPILE_WIDE > { > /* The zone string is always given in multibyte form. We have > - to transform it first. */ > - wchar_t *wczone; > - size_t len; > - widen (zone, wczone, len); > - cpy (len, wczone); > + to convert it to wide character. */ > + size_t w = pad == L_('-') || width < 0 ? 0 : width; > + char const *z = zone; > + mbstate_t st = {0}; > + size_t len = __mbsrtowcs_l (p, &z, maxsize - i, &st, loc); > + if (len == (size_t) -1) > + return 0; > + size_t incr = len < w ? w : len; > + if (incr >= maxsize - i) > + { > + errno = ERANGE; > + return 0; > + } > + if (p) > + { > + if (len < w) > + { > + size_t delta = w - len; > + wmemmove (p + delta, p, len); > + wchar_t wc = pad == L_('0') || pad == L_('+') ? L'0' : L' '; > + wmemset (p, wc, delta); > + } > + p += incr; > + } > + i += incr; > } > #else > cpy (strlen (zone), zone); > -- > 2.39.2 >
diff --git a/time/strftime_l.c b/time/strftime_l.c index 402c6c4111..59d3e1a3b2 100644 --- a/time/strftime_l.c +++ b/time/strftime_l.c @@ -273,8 +273,9 @@ static const CHAR_T zeroes[16] = /* "0000000000000000" */ const char *__s = os; \ memset (&__st, '\0', sizeof (__st)); \ l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc); \ - ws = alloca ((l + 1) * sizeof (wchar_t)); \ - (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc); \ + ws = malloc ((l + 1) * sizeof (wchar_t)); \ + if (ws != NULL) \ + (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc); \ } #endif @@ -1346,7 +1347,10 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format, wchar_t *wczone; size_t len; widen (zone, wczone, len); + if (wczone == NULL) + return 0; cpy (len, wczone); + free (wczone); } #else cpy (strlen (zone), zone);