Message ID | 20230627180556.728512-1-josimmon@redhat.com |
---|---|
State | New |
Headers | show |
Series | vfscanf-internal: Get rid of unbounded allocas | expand |
On 27/06/23 15:05, Joe Simmons-Talbott via Libc-alpha wrote: > Replace two unbounded allocas with scratch_buffers to avoid potential > stack overflow. > --- > stdio-common/vfscanf-internal.c | 26 +++++++++++++++++++++++--- > 1 file changed, 23 insertions(+), 3 deletions(-) > > diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c > index bfb9baa21a..60376ccff9 100644 > --- a/stdio-common/vfscanf-internal.c > +++ b/stdio-common/vfscanf-internal.c > @@ -335,6 +335,12 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, > struct char_buffer charbuf; > scratch_buffer_init (&charbuf.scratch); > > + struct scratch_buffer wc_ext_sbuf; > + scratch_buffer_init (&wc_ext_sbuf); > + > + struct scratch_buffer mb_ext_sbuf; > + scratch_buffer_init (&mb_ext_sbuf); > + > #ifdef __va_copy > __va_copy (arg, argptr); > #else > @@ -1488,8 +1494,14 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, > wcdigits[n] = (const wchar_t *) > _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); > > - wchar_t *wc_extended = (wchar_t *) > - alloca ((to_level + 2) * sizeof (wchar_t)); > + wchar_t *wc_extended; > + if (!scratch_buffer_set_array_size > + (&wc_ext_sbuf, sizeof (wchar_t), to_level + 2)) > + { > + done = EOF; > + goto errout; > + } > + wc_extended = wc_ext_sbuf.data; > __wmemcpy (wc_extended, wcdigits[n], to_level); > wc_extended[to_level] = __towctrans (L'0' + n, map); > wc_extended[to_level + 1] = '\0'; > @@ -1525,7 +1537,13 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, > > /* Allocate memory for extended multibyte digit. */ > char *mb_extended; > - mb_extended = (char *) alloca (mbdigits_len + mblen + 1); > + if (!scratch_buffer_set_array_size > + (&mb_ext_sbuf, 1, mbdigits_len + mblen + 1)) > + { > + done = EOF; > + goto errout; > + } > + mb_extended = mb_ext_sbuf.data; Unfortunately this change is wrong, it will overwrite the wcdigits[n]/mbdigits[n] on each iteration since you set the value of the stratch_buffer without adjusting the offset for each entry (for the wide operation). This code path will just be triggered by a handful of locale, but you can check with the example: --- #include <assert.h> #include <stdio.h> #include <locale.h> #define array_length(var) \ (sizeof (var) / sizeof ((var)[0]) \ + 0 * sizeof (struct { \ _Static_assert (!__builtin_types_compatible_p \ (__typeof (var), __typeof (&(var)[0])), \ "argument must be an array"); \ })) static const struct { int n; const char *str; } inputs[] = { { 1, "\xdb\xb1" }, { 2, "\xdb\xb2" }, { 3, "\xdb\xb3" }, { 4, "\xdb\xb4" }, { 5, "\xdb\xb5" }, { 6, "\xdb\xb6" }, { 7, "\xdb\xb7" }, { 8, "\xdb\xb8" }, { 9, "\xdb\xb9" }, { 10, "\xdb\xb1\xdb\xb0" }, { 11, "\xdb\xb1\xdb\xb1" }, { 12, "\xdb\xb1\xdb\xb2" }, { 13, "\xdb\xb1\xdb\xb3" }, { 14, "\xdb\xb1\xdb\xb4" }, { 15, "\xdb\xb1\xdb\xb5" }, { 16, "\xdb\xb1\xdb\xb6" }, { 17, "\xdb\xb1\xdb\xb7" }, { 18, "\xdb\xb1\xdb\xb8" }, { 19, "\xdb\xb1\xdb\xb9" }, { 20, "\xdb\xb2\xdb\xb0" }, { 30, "\xdb\xb3\xdb\xb0" }, { 40, "\xdb\xb4\xdb\xb0" }, { 50, "\xdb\xb5\xdb\xb0" }, { 60, "\xdb\xb6\xdb\xb0" }, { 70, "\xdb\xb7\xdb\xb0" }, { 80, "\xdb\xb8\xdb\xb0" }, { 90, "\xdb\xb9\xdb\xb0" }, { 100, "\xdb\xb1\xdb\xb0\xdb\xb0" }, { 1000, "\xdb\xb1\xdb\xb0\xdb\xb0\xdb\xb0" }, }; int main (int argc, char *argv[]) { assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL); for (int i = 0; i < array_length (inputs); i++) { int n; sscanf (inputs[i].str, "%Id", &n); assert (n == inputs[i].n); } return 0; } --- I generate the inputs by round-trip with printf using the same locale. A similar test for wide char would be: --- #include <assert.h> #include <stdio.h> #include <locale.h> #include <wchar.h> #define array_length(var) \ (sizeof (var) / sizeof ((var)[0]) \ + 0 * sizeof (struct { \ _Static_assert (!__builtin_types_compatible_p \ (__typeof (var), __typeof (&(var)[0])), \ "argument must be an array"); \ })) static const struct input_t { int n; const wchar_t str[5]; } inputs[] = { { 1, { 0x000006f1, L'\0' } }, { 2, { 0x000006f2, L'\0' } }, { 3, { 0x000006f3, L'\0' } }, { 4, { 0x000006f4, L'\0' } }, { 5, { 0x000006f5, L'\0' } }, { 6, { 0x000006f6, L'\0' } }, { 7, { 0x000006f7, L'\0' } }, { 8, { 0x000006f8, L'\0' } }, { 9, { 0x000006f9, L'\0' } }, { 10, { 0x000006f1, 0x000006f0, L'\0' } }, { 11, { 0x000006f1, 0x000006f1, L'\0' } }, { 12, { 0x000006f1, 0x000006f2, L'\0' } }, { 13, { 0x000006f1, 0x000006f3, L'\0' } }, { 14, { 0x000006f1, 0x000006f4, L'\0' } }, { 15, { 0x000006f1, 0x000006f5, L'\0' } }, { 16, { 0x000006f1, 0x000006f6, L'\0' } }, { 17, { 0x000006f1, 0x000006f7, L'\0' } }, { 18, { 0x000006f1, 0x000006f8, L'\0' } }, { 19, { 0x000006f1, 0x000006f9, L'\0' } }, { 20, { 0x000006f2, 0x000006f0, L'\0' } }, { 30, { 0x000006f3, 0x000006f0, L'\0' } }, { 40, { 0x000006f4, 0x000006f0, L'\0' } }, { 50, { 0x000006f5, 0x000006f0, L'\0' } }, { 60, { 0x000006f6, 0x000006f0, L'\0' } }, { 70, { 0x000006f7, 0x000006f0, L'\0' } }, { 80, { 0x000006f8, 0x000006f0, L'\0' } }, { 90, { 0x000006f9, 0x000006f0, L'\0' } }, { 100, { 0x000006f1, 0x000006f0, 0x000006f0, L'\0' } }, { 1000, { 0x000006f1, 0x000006f0, 0x000006f0, 0x000006f0, L'\0' } }, }; int main (int argc, char *argv[]) { assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL); for (int i = 0; i < array_length (inputs); i++) { int n = 0; swscanf (inputs[i].str, L"%Id", &n); wprintf (L"n=%d input[%d].n=%d (%ls)\n", n, i, inputs[i].n, inputs[i].str); assert (n == inputs[i].n); } return 0; }; --- Also, you do not need two scratch buffer, since either one will be used for !COMPILE_WSCANF and COMPILE_WSCANF; and to avoid the extra stack requirement (since there are not used for the most locales) only define them if I18N is actually used. The alternate digit handling should be something like: if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0)) { [...] struct scratch_buffer mb_ext_sbuf; scratch_buffer_init (&mb_ext_sbuf); uintptr_t mb_ext_sbuf_off = 0; [...] /* Get the alternative digit forms if there are any. */ if (__glibc_unlikely (map != NULL)) { /* Adding new level for extra digits set in locale file. */ ++to_level; for (n = 0; n < 10; ++n) { size_t mb_ext_sbuf_len; #ifdef COMPILE_WSCANF wcdigits[n] = (const wchar_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); mb_ext_sbuf_len = (to_level + 2) * sizeof (wchar_t); #else mbdigits[n] = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string; /* Get the equivalent wide char in map. */ wint_t extra_wcdigit = __towctrans (L'0' + n, map); /* Convert it to multibyte representation. */ mbstate_t state; memset (&state, '\0', sizeof (state)); char extra_mbdigit[MB_LEN_MAX]; size_t mblen = __wcrtomb (extra_mbdigit, extra_wcdigit, &state); if (mblen == (size_t) -1) { /* Ignore this new level. */ map = NULL; break; } /* Calculate the length of mbdigits[n]. */ const char *last_char = mbdigits[n]; for (level = 0; level < to_level; ++level) last_char = strchr (last_char, '\0') + 1; size_t mbdigits_len = last_char - mbdigits[n]; mb_ext_sbuf_len = mbdigits_len + mblen + 1; #endif /* Allocate memory for extended multibyte digit. */ size_t mb_ext_sbuf_total = mb_ext_sbuf_off + mb_ext_sbuf_len; if (!scratch_buffer_set_array_size (&wc_ext_sbuf, 1, mb_ext_sbuf_total)) { scratch_buffer_free (&mb_ext_sbuf); done = EOF; goto errout; } #ifdef COMPILE_WSCANF wchar_t *wc_extended = wc_ext_sbuf.data + mb_ext_sbuf_off; mb_ext_sbuf_off += mb_ext_sbuf_len; __wmemcpy (wc_extended, wcdigits[n], to_level); wc_extended[to_level] = __towctrans (L'0' + n, map); wc_extended[to_level + 1] = '\0'; wcdigits_extended[n] = wc_extended; #else char *mb_extended = mb_ext_sbuf.data + mb_ext_sbuf_off; mb_ext_sbuf_off += mb_ext_sbuf_len; /* And get the mbdigits + extra_digit string. */ *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n], mbdigits_len), extra_mbdigit, mblen) = '\0'; mbdigits_extended[n] = mb_extended; #endif } } So I would suggest to add proper tests (you may use the examples above) for this change. > > /* And get the mbdigits + extra_digit string. */ > *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],> @@ -3063,5 +3081,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, > free (*strptr); > *strptr = NULL; > } > + scratch_buffer_free (&wc_ext_sbuf); > + scratch_buffer_free (&mb_ext_sbuf); > return done; > }
On 30/06/23 15:14, Adhemerval Zanella Netto wrote: > > > On 27/06/23 15:05, Joe Simmons-Talbott via Libc-alpha wrote: >> Replace two unbounded allocas with scratch_buffers to avoid potential >> stack overflow. >> --- >> stdio-common/vfscanf-internal.c | 26 +++++++++++++++++++++++--- >> 1 file changed, 23 insertions(+), 3 deletions(-) >> >> diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c >> index bfb9baa21a..60376ccff9 100644 >> --- a/stdio-common/vfscanf-internal.c >> +++ b/stdio-common/vfscanf-internal.c >> @@ -335,6 +335,12 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, >> struct char_buffer charbuf; >> scratch_buffer_init (&charbuf.scratch); >> >> + struct scratch_buffer wc_ext_sbuf; >> + scratch_buffer_init (&wc_ext_sbuf); >> + >> + struct scratch_buffer mb_ext_sbuf; >> + scratch_buffer_init (&mb_ext_sbuf); >> + >> #ifdef __va_copy >> __va_copy (arg, argptr); >> #else >> @@ -1488,8 +1494,14 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, >> wcdigits[n] = (const wchar_t *) >> _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); >> >> - wchar_t *wc_extended = (wchar_t *) >> - alloca ((to_level + 2) * sizeof (wchar_t)); >> + wchar_t *wc_extended; >> + if (!scratch_buffer_set_array_size >> + (&wc_ext_sbuf, sizeof (wchar_t), to_level + 2)) >> + { >> + done = EOF; >> + goto errout; >> + } >> + wc_extended = wc_ext_sbuf.data; >> __wmemcpy (wc_extended, wcdigits[n], to_level); >> wc_extended[to_level] = __towctrans (L'0' + n, map); >> wc_extended[to_level + 1] = '\0'; >> @@ -1525,7 +1537,13 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, >> >> /* Allocate memory for extended multibyte digit. */ >> char *mb_extended; >> - mb_extended = (char *) alloca (mbdigits_len + mblen + 1); >> + if (!scratch_buffer_set_array_size >> + (&mb_ext_sbuf, 1, mbdigits_len + mblen + 1)) >> + { >> + done = EOF; >> + goto errout; >> + } >> + mb_extended = mb_ext_sbuf.data; > > Unfortunately this change is wrong, it will overwrite the wcdigits[n]/mbdigits[n] > on each iteration since you set the value of the stratch_buffer without adjusting > the offset for each entry (for the wide operation). > > This code path will just be triggered by a handful of locale, but you can check > with the example: > > --- > #include <assert.h> > #include <stdio.h> > #include <locale.h> > > #define array_length(var) \ > (sizeof (var) / sizeof ((var)[0]) \ > + 0 * sizeof (struct { \ > _Static_assert (!__builtin_types_compatible_p \ > (__typeof (var), __typeof (&(var)[0])), \ > "argument must be an array"); \ > })) > > static const struct > { > int n; > const char *str; > } inputs[] = > { > { 1, "\xdb\xb1" }, > { 2, "\xdb\xb2" }, > { 3, "\xdb\xb3" }, > { 4, "\xdb\xb4" }, > { 5, "\xdb\xb5" }, > { 6, "\xdb\xb6" }, > { 7, "\xdb\xb7" }, > { 8, "\xdb\xb8" }, > { 9, "\xdb\xb9" }, > { 10, "\xdb\xb1\xdb\xb0" }, > { 11, "\xdb\xb1\xdb\xb1" }, > { 12, "\xdb\xb1\xdb\xb2" }, > { 13, "\xdb\xb1\xdb\xb3" }, > { 14, "\xdb\xb1\xdb\xb4" }, > { 15, "\xdb\xb1\xdb\xb5" }, > { 16, "\xdb\xb1\xdb\xb6" }, > { 17, "\xdb\xb1\xdb\xb7" }, > { 18, "\xdb\xb1\xdb\xb8" }, > { 19, "\xdb\xb1\xdb\xb9" }, > { 20, "\xdb\xb2\xdb\xb0" }, > { 30, "\xdb\xb3\xdb\xb0" }, > { 40, "\xdb\xb4\xdb\xb0" }, > { 50, "\xdb\xb5\xdb\xb0" }, > { 60, "\xdb\xb6\xdb\xb0" }, > { 70, "\xdb\xb7\xdb\xb0" }, > { 80, "\xdb\xb8\xdb\xb0" }, > { 90, "\xdb\xb9\xdb\xb0" }, > { 100, "\xdb\xb1\xdb\xb0\xdb\xb0" }, > { 1000, "\xdb\xb1\xdb\xb0\xdb\xb0\xdb\xb0" }, > }; > > int main (int argc, char *argv[]) > { > assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL); > > for (int i = 0; i < array_length (inputs); i++) > { > int n; > sscanf (inputs[i].str, "%Id", &n); > assert (n == inputs[i].n); > } > > return 0; > } > --- > > I generate the inputs by round-trip with printf using the same locale. A similar test > for wide char would be: > > --- > #include <assert.h> > #include <stdio.h> > #include <locale.h> > #include <wchar.h> > > #define array_length(var) \ > (sizeof (var) / sizeof ((var)[0]) \ > + 0 * sizeof (struct { \ > _Static_assert (!__builtin_types_compatible_p \ > (__typeof (var), __typeof (&(var)[0])), \ > "argument must be an array"); \ > })) > > static const struct input_t > { > int n; > const wchar_t str[5]; > } inputs[] = > { > { 1, { 0x000006f1, L'\0' } }, > { 2, { 0x000006f2, L'\0' } }, > { 3, { 0x000006f3, L'\0' } }, > { 4, { 0x000006f4, L'\0' } }, > { 5, { 0x000006f5, L'\0' } }, > { 6, { 0x000006f6, L'\0' } }, > { 7, { 0x000006f7, L'\0' } }, > { 8, { 0x000006f8, L'\0' } }, > { 9, { 0x000006f9, L'\0' } }, > { 10, { 0x000006f1, 0x000006f0, L'\0' } }, > { 11, { 0x000006f1, 0x000006f1, L'\0' } }, > { 12, { 0x000006f1, 0x000006f2, L'\0' } }, > { 13, { 0x000006f1, 0x000006f3, L'\0' } }, > { 14, { 0x000006f1, 0x000006f4, L'\0' } }, > { 15, { 0x000006f1, 0x000006f5, L'\0' } }, > { 16, { 0x000006f1, 0x000006f6, L'\0' } }, > { 17, { 0x000006f1, 0x000006f7, L'\0' } }, > { 18, { 0x000006f1, 0x000006f8, L'\0' } }, > { 19, { 0x000006f1, 0x000006f9, L'\0' } }, > { 20, { 0x000006f2, 0x000006f0, L'\0' } }, > { 30, { 0x000006f3, 0x000006f0, L'\0' } }, > { 40, { 0x000006f4, 0x000006f0, L'\0' } }, > { 50, { 0x000006f5, 0x000006f0, L'\0' } }, > { 60, { 0x000006f6, 0x000006f0, L'\0' } }, > { 70, { 0x000006f7, 0x000006f0, L'\0' } }, > { 80, { 0x000006f8, 0x000006f0, L'\0' } }, > { 90, { 0x000006f9, 0x000006f0, L'\0' } }, > { 100, { 0x000006f1, 0x000006f0, 0x000006f0, L'\0' } }, > { 1000, { 0x000006f1, 0x000006f0, 0x000006f0, 0x000006f0, L'\0' } }, > }; > > int main (int argc, char *argv[]) > { > assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL); > > for (int i = 0; i < array_length (inputs); i++) > { > int n = 0; > swscanf (inputs[i].str, L"%Id", &n); > wprintf (L"n=%d input[%d].n=%d (%ls)\n", > n, > i, > inputs[i].n, > inputs[i].str); > assert (n == inputs[i].n); > } > > return 0; > }; > --- > > Also, you do not need two scratch buffer, since either one will be used for > !COMPILE_WSCANF and COMPILE_WSCANF; and to avoid the extra stack requirement > (since there are not used for the most locales) only define them if I18N > is actually used. The alternate digit handling should be something like: Scratch that, I just realized that once we reallocate the scratch_buffer the digits_extended pointers will be invalidated anyway. I think a better strategy would be just use malloc here, it will slow down the conversion a bit but since the size will small it will be fulfilled by the thread cache anyway. I will send a fix with some testcases.
diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c index bfb9baa21a..60376ccff9 100644 --- a/stdio-common/vfscanf-internal.c +++ b/stdio-common/vfscanf-internal.c @@ -335,6 +335,12 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, struct char_buffer charbuf; scratch_buffer_init (&charbuf.scratch); + struct scratch_buffer wc_ext_sbuf; + scratch_buffer_init (&wc_ext_sbuf); + + struct scratch_buffer mb_ext_sbuf; + scratch_buffer_init (&mb_ext_sbuf); + #ifdef __va_copy __va_copy (arg, argptr); #else @@ -1488,8 +1494,14 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, wcdigits[n] = (const wchar_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); - wchar_t *wc_extended = (wchar_t *) - alloca ((to_level + 2) * sizeof (wchar_t)); + wchar_t *wc_extended; + if (!scratch_buffer_set_array_size + (&wc_ext_sbuf, sizeof (wchar_t), to_level + 2)) + { + done = EOF; + goto errout; + } + wc_extended = wc_ext_sbuf.data; __wmemcpy (wc_extended, wcdigits[n], to_level); wc_extended[to_level] = __towctrans (L'0' + n, map); wc_extended[to_level + 1] = '\0'; @@ -1525,7 +1537,13 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, /* Allocate memory for extended multibyte digit. */ char *mb_extended; - mb_extended = (char *) alloca (mbdigits_len + mblen + 1); + if (!scratch_buffer_set_array_size + (&mb_ext_sbuf, 1, mbdigits_len + mblen + 1)) + { + done = EOF; + goto errout; + } + mb_extended = mb_ext_sbuf.data; /* And get the mbdigits + extra_digit string. */ *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n], @@ -3063,5 +3081,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr, free (*strptr); *strptr = NULL; } + scratch_buffer_free (&wc_ext_sbuf); + scratch_buffer_free (&mb_ext_sbuf); return done; }