Message ID | 20241108171450.411932-3-siddhesh@sourceware.org |
---|---|
State | New |
Headers | show |
Series | Guarantee first pushback in ungetc and ungetwc | expand |
* Siddhesh Poyarekar: > The C standard requires that ungetc guarantees at least one pushback, so > put a single byte pushback buffer in the FILE struct to enable that. > > Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> > --- > libio/bits/types/struct_FILE.h | 3 +- > libio/fileops.c | 21 +++----- > libio/genops.c | 27 +++++----- > stdio-common/Makefile | 1 + > stdio-common/tst-ungetc-nomem.c | 94 +++++++++++++++++++++++++++++++++ > 5 files changed, 118 insertions(+), 28 deletions(-) > create mode 100644 stdio-common/tst-ungetc-nomem.c > > diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h > index d8d26639d1..6b5295fce5 100644 > --- a/libio/bits/types/struct_FILE.h > +++ b/libio/bits/types/struct_FILE.h > @@ -94,8 +94,9 @@ struct _IO_FILE_complete > void *_freeres_buf; > struct _IO_FILE **_prevchain; > int _mode; > + char _short_backupbuf[1]; > /* Make sure we don't get into trouble again. */ > - char _unused2[15 * sizeof (int) - 5 * sizeof (void *)]; > + char _unused2[15 * sizeof (int) - 5 * sizeof (void *) - sizeof (char)]; > }; There's unused space after _shortbuf (even on m68k), so please use that instead. It avoids issues with legacy streams not having the extended part. It avoids a conflict with a patch Tulio is working on, too. > /* These macros are used by bits/stdio.h and internal headers. */ > diff --git a/libio/fileops.c b/libio/fileops.c > index 4db4a76f75..67e57e0d8c 100644 > --- a/libio/fileops.c > +++ b/libio/fileops.c > @@ -478,11 +478,8 @@ _IO_new_file_underflow (FILE *fp) > if (fp->_IO_buf_base == NULL) > { > /* Maybe we already have a push back pointer. */ > - if (fp->_IO_save_base != NULL) > - { > - free (fp->_IO_save_base); > - fp->_flags &= ~_IO_IN_BACKUP; > - } > + if (_IO_have_backup (fp)) > + _IO_free_backup_area (fp); > _IO_doallocbuf (fp); > } So … why is it okay to call _IO_free_backup_area here, given the many side effects it has? I suggest to rename free_backup_buf to _IO_free_backup_buf, make it extern + attribute_hidden, and only replace the relevant free calls. > diff --git a/libio/genops.c b/libio/genops.c > index 6545a78ad5..02b38fbc9a 100644 > --- a/libio/genops.c > +++ b/libio/genops.c > @@ -48,6 +48,13 @@ flush_cleanup (void *not_used) > } > #endif > > +static void > +free_backup_buf (FILE *fp, char *ptr) > +{ > + if (fp->_short_backupbuf != ptr) > + free (ptr); > +} I'd prefer if ptr comes first, as the scrutinee. > @@ -634,7 +641,7 @@ _IO_default_finish (FILE *fp, int dummy) > for (mark = fp->_markers; mark != NULL; mark = mark->_next) > mark->_sbuf = NULL; > > - if (fp->_IO_save_base) > + if (fp->_IO_save_base && fp->_IO_save_base != fp->_short_backupbuf) > { > free (fp->_IO_save_base); > fp->_IO_save_base = NULL; This skips the NULL assignment if _IO_save_base is _short_backupbuf. That doesn't look right. > else if (!_IO_have_backup (fp)) > { > - /* No backup buffer: allocate one. */ > - /* Use nshort buffer, if unused? (probably not) FIXME */ > - int backup_size = 128; > - char *bbuf = (char *) malloc (backup_size); > - if (bbuf == NULL) > - return EOF; > - fp->_IO_save_base = bbuf; > - fp->_IO_save_end = fp->_IO_save_base + backup_size; > + /* We need to guarantee one pushback, so start with the built-in > + 1-char buffer. */ > + fp->_IO_save_base = fp->_short_backupbuf; > + fp->_IO_save_end = fp->_IO_save_base + 1; > fp->_IO_backup_base = fp->_IO_save_end; > } Please use the 1-character buffer only as a fallback, to avoid changing the behavior if malloc does not fail. > diff --git a/stdio-common/tst-ungetc-nomem.c b/stdio-common/tst-ungetc-nomem.c > new file mode 100644 > index 0000000000..bf36229a9c > --- /dev/null > +++ b/stdio-common/tst-ungetc-nomem.c > +static bool fail = false; Should be volatile, to inhibit compiler optimizations around malloc. > + > +void * > +malloc (size_t sz) > +{ > + if (fail) > + return NULL; > + > + return __libc_malloc (sz); > +} > + > +static int > +do_test (void) > +{ > + char *filename = NULL; > + struct stat props = {}; > + size_t bufsz = 0; > + > + create_temp_file ("tst-ungetc-nomem.", &filename); > + if (stat (filename, &props) != 0) > + FAIL_EXIT1 ("Could not get file status: %m\n"); > + > + FILE *fp = fopen (filename, "w"); > + > + /* The libio buffer sizes are the same as block size. */ > + bufsz = props.st_blksize + 2; > + > + char *buf = xmalloc (bufsz); > + memset (buf, 'a', bufsz); > + > + size_t remaining = bufsz; > + while (remaining > 0) > + { > + size_t done = fwrite (buf, sizeof (char), remaining, fp); > + if (done == 0) > + break; > + remaining -= done; > + } Why the retry loop? I don't think fwrite works this way. > + fclose (fp); xfclose? > + /* Begin test. */ > + fp = fopen (filename, "r"); xfopen? > + > + > + /* The standard requires the first ungetc to always work. */ > + fail = true; > + TEST_COMPARE (ungetc('y', fp), 'y'); Missing space after ungetc. > + /* Now let the buffers get allocated to allow for subsequent tests. */ > + fail = false; > + TEST_COMPARE (fgetc (fp), 'y'); > + TEST_COMPARE (ungetc('y', fp), 'y'); > + TEST_COMPARE (fgetc (fp), 'y'); This doesn't exercise the case where we switch from a 1-byte buffer to a larger buffer. Thanks, Florian
On 2024-11-28 08:45, Florian Weimer wrote: > * Siddhesh Poyarekar: > >> The C standard requires that ungetc guarantees at least one pushback, so >> put a single byte pushback buffer in the FILE struct to enable that. >> >> Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> >> --- >> libio/bits/types/struct_FILE.h | 3 +- >> libio/fileops.c | 21 +++----- >> libio/genops.c | 27 +++++----- >> stdio-common/Makefile | 1 + >> stdio-common/tst-ungetc-nomem.c | 94 +++++++++++++++++++++++++++++++++ >> 5 files changed, 118 insertions(+), 28 deletions(-) >> create mode 100644 stdio-common/tst-ungetc-nomem.c >> >> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h >> index d8d26639d1..6b5295fce5 100644 >> --- a/libio/bits/types/struct_FILE.h >> +++ b/libio/bits/types/struct_FILE.h >> @@ -94,8 +94,9 @@ struct _IO_FILE_complete >> void *_freeres_buf; >> struct _IO_FILE **_prevchain; >> int _mode; >> + char _short_backupbuf[1]; >> /* Make sure we don't get into trouble again. */ >> - char _unused2[15 * sizeof (int) - 5 * sizeof (void *)]; >> + char _unused2[15 * sizeof (int) - 5 * sizeof (void *) - sizeof (char)]; >> }; > > There's unused space after _shortbuf (even on m68k), so please use that > instead. It avoids issues with legacy streams not having the extended > part. OK. > It avoids a conflict with a patch Tulio is working on, too. > >> /* These macros are used by bits/stdio.h and internal headers. */ >> diff --git a/libio/fileops.c b/libio/fileops.c >> index 4db4a76f75..67e57e0d8c 100644 >> --- a/libio/fileops.c >> +++ b/libio/fileops.c >> @@ -478,11 +478,8 @@ _IO_new_file_underflow (FILE *fp) >> if (fp->_IO_buf_base == NULL) >> { >> /* Maybe we already have a push back pointer. */ >> - if (fp->_IO_save_base != NULL) >> - { >> - free (fp->_IO_save_base); >> - fp->_flags &= ~_IO_IN_BACKUP; >> - } >> + if (_IO_have_backup (fp)) >> + _IO_free_backup_area (fp); >> _IO_doallocbuf (fp); >> } > > So … why is it okay to call _IO_free_backup_area here, given the many > side effects it has? > > I suggest to rename free_backup_buf to _IO_free_backup_buf, make it > extern + attribute_hidden, and only replace the relevant free calls. OK, that's what I did first, but it seemed more coherent to always fully restore state from backup to main area instead. >> diff --git a/libio/genops.c b/libio/genops.c >> index 6545a78ad5..02b38fbc9a 100644 >> --- a/libio/genops.c >> +++ b/libio/genops.c >> @@ -48,6 +48,13 @@ flush_cleanup (void *not_used) >> } >> #endif >> >> +static void >> +free_backup_buf (FILE *fp, char *ptr) >> +{ >> + if (fp->_short_backupbuf != ptr) >> + free (ptr); >> +} > > I'd prefer if ptr comes first, as the scrutinee. OK. > >> @@ -634,7 +641,7 @@ _IO_default_finish (FILE *fp, int dummy) >> for (mark = fp->_markers; mark != NULL; mark = mark->_next) >> mark->_sbuf = NULL; >> >> - if (fp->_IO_save_base) >> + if (fp->_IO_save_base && fp->_IO_save_base != fp->_short_backupbuf) >> { >> free (fp->_IO_save_base); >> fp->_IO_save_base = NULL; > > This skips the NULL assignment if _IO_save_base is _short_backupbuf. > That doesn't look right. Uhmm, yeah. Fixed. >> else if (!_IO_have_backup (fp)) >> { >> - /* No backup buffer: allocate one. */ >> - /* Use nshort buffer, if unused? (probably not) FIXME */ >> - int backup_size = 128; >> - char *bbuf = (char *) malloc (backup_size); >> - if (bbuf == NULL) >> - return EOF; >> - fp->_IO_save_base = bbuf; >> - fp->_IO_save_end = fp->_IO_save_base + backup_size; >> + /* We need to guarantee one pushback, so start with the built-in >> + 1-char buffer. */ >> + fp->_IO_save_base = fp->_short_backupbuf; >> + fp->_IO_save_end = fp->_IO_save_base + 1; >> fp->_IO_backup_base = fp->_IO_save_end; >> } > > Please use the 1-character buffer only as a fallback, to avoid changing > the behavior if malloc does not fail. But doesn't this become a micro-optimization too for 1-char pushbacks? I'll admit I don't know if that's the predominant case, but it seems like it should be a pretty common one. >> diff --git a/stdio-common/tst-ungetc-nomem.c b/stdio-common/tst-ungetc-nomem.c >> new file mode 100644 >> index 0000000000..bf36229a9c >> --- /dev/null >> +++ b/stdio-common/tst-ungetc-nomem.c > >> +static bool fail = false; > > Should be volatile, to inhibit compiler optimizations around malloc. OK. >> + >> +void * >> +malloc (size_t sz) >> +{ >> + if (fail) >> + return NULL; >> + >> + return __libc_malloc (sz); >> +} >> + >> +static int >> +do_test (void) >> +{ >> + char *filename = NULL; >> + struct stat props = {}; >> + size_t bufsz = 0; >> + >> + create_temp_file ("tst-ungetc-nomem.", &filename); >> + if (stat (filename, &props) != 0) >> + FAIL_EXIT1 ("Could not get file status: %m\n"); >> + >> + FILE *fp = fopen (filename, "w"); >> + >> + /* The libio buffer sizes are the same as block size. */ >> + bufsz = props.st_blksize + 2; >> + >> + char *buf = xmalloc (bufsz); >> + memset (buf, 'a', bufsz); >> + >> + size_t remaining = bufsz; >> + while (remaining > 0) >> + { >> + size_t done = fwrite (buf, sizeof (char), remaining, fp); >> + if (done == 0) >> + break; >> + remaining -= done; >> + } > > Why the retry loop? I don't think fwrite works this way. Eek, right, sorry. >> + fclose (fp); > > xfclose? > >> + /* Begin test. */ >> + fp = fopen (filename, "r"); > > xfopen? OK. > >> + >> + >> + /* The standard requires the first ungetc to always work. */ >> + fail = true; >> + TEST_COMPARE (ungetc('y', fp), 'y'); > > Missing space after ungetc. Fixed. > >> + /* Now let the buffers get allocated to allow for subsequent tests. */ >> + fail = false; >> + TEST_COMPARE (fgetc (fp), 'y'); >> + TEST_COMPARE (ungetc('y', fp), 'y'); >> + TEST_COMPARE (fgetc (fp), 'y'); > > This doesn't exercise the case where we switch from a 1-byte buffer to a > larger buffer. OK, I'll add this. Thanks, Sid
* Siddhesh Poyarekar: >> So … why is it okay to call _IO_free_backup_area here, given the many >> side effects it has? I suggest to rename free_backup_buf to >> _IO_free_backup_buf, make it extern + attribute_hidden, and only >> replace the relevant free calls. > > OK, that's what I did first, but it seemed more coherent to always > fully restore state from backup to main area instead. Maybe, but this is libio, so I think we should minimize changes. >>> else if (!_IO_have_backup (fp)) >>> { >>> - /* No backup buffer: allocate one. */ >>> - /* Use nshort buffer, if unused? (probably not) FIXME */ >>> - int backup_size = 128; >>> - char *bbuf = (char *) malloc (backup_size); >>> - if (bbuf == NULL) >>> - return EOF; >>> - fp->_IO_save_base = bbuf; >>> - fp->_IO_save_end = fp->_IO_save_base + backup_size; >>> + /* We need to guarantee one pushback, so start with the built-in >>> + 1-char buffer. */ >>> + fp->_IO_save_base = fp->_short_backupbuf; >>> + fp->_IO_save_end = fp->_IO_save_base + 1; >>> fp->_IO_backup_base = fp->_IO_save_end; >>> } >> Please use the 1-character buffer only as a fallback, to avoid >> changing the behavior if malloc does not fail. > > But doesn't this become a micro-optimization too for 1-char pushbacks? > I'll admit I don't know if that's the predominant case, but it seems > like it should be a pretty common one. We currently have little experience with that reallocation path, and with the 1-character buffer, we suddenly start exercising that quite heavily if there is more than one ungetc call. The optimization could be a follow-up change. Thanks, Florian
On 2024-11-28 12:23, Florian Weimer wrote: > * Siddhesh Poyarekar: > >>> So … why is it okay to call _IO_free_backup_area here, given the many >>> side effects it has? I suggest to rename free_backup_buf to >>> _IO_free_backup_buf, make it extern + attribute_hidden, and only >>> replace the relevant free calls. >> >> OK, that's what I did first, but it seemed more coherent to always >> fully restore state from backup to main area instead. > > Maybe, but this is libio, so I think we should minimize changes. This is libio, we should sneakily rewrite all of it ;) >>>> else if (!_IO_have_backup (fp)) >>>> { >>>> - /* No backup buffer: allocate one. */ >>>> - /* Use nshort buffer, if unused? (probably not) FIXME */ >>>> - int backup_size = 128; >>>> - char *bbuf = (char *) malloc (backup_size); >>>> - if (bbuf == NULL) >>>> - return EOF; >>>> - fp->_IO_save_base = bbuf; >>>> - fp->_IO_save_end = fp->_IO_save_base + backup_size; >>>> + /* We need to guarantee one pushback, so start with the built-in >>>> + 1-char buffer. */ >>>> + fp->_IO_save_base = fp->_short_backupbuf; >>>> + fp->_IO_save_end = fp->_IO_save_base + 1; >>>> fp->_IO_backup_base = fp->_IO_save_end; >>>> } >>> Please use the 1-character buffer only as a fallback, to avoid >>> changing the behavior if malloc does not fail. >> >> But doesn't this become a micro-optimization too for 1-char pushbacks? >> I'll admit I don't know if that's the predominant case, but it seems >> like it should be a pretty common one. > > We currently have little experience with that reallocation path, and > with the 1-character buffer, we suddenly start exercising that quite > heavily if there is more than one ungetc call. The optimization could > be a follow-up change. OK, I'll take the more conservative route with v2 :) Thanks, Sid Note to self: be even more sneaky...
On Thu, 28 Nov 2024, Florian Weimer wrote: > > diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h > > index d8d26639d1..6b5295fce5 100644 > > --- a/libio/bits/types/struct_FILE.h > > +++ b/libio/bits/types/struct_FILE.h > > @@ -94,8 +94,9 @@ struct _IO_FILE_complete > > void *_freeres_buf; > > struct _IO_FILE **_prevchain; > > int _mode; > > + char _short_backupbuf[1]; > > /* Make sure we don't get into trouble again. */ > > - char _unused2[15 * sizeof (int) - 5 * sizeof (void *)]; > > + char _unused2[15 * sizeof (int) - 5 * sizeof (void *) - sizeof (char)]; > > }; > > There's unused space after _shortbuf (even on m68k), so please use that > instead. It avoids issues with legacy streams not having the extended > part. There isn't AFAICT e.g. on o32 MIPS: struct _IO_FILE { int _flags; /* 0 4 */ char * _IO_read_ptr; /* 4 4 */ char * _IO_read_end; /* 8 4 */ char * _IO_read_base; /* 12 4 */ char * _IO_write_base; /* 16 4 */ char * _IO_write_ptr; /* 20 4 */ char * _IO_write_end; /* 24 4 */ char * _IO_buf_base; /* 28 4 */ char * _IO_buf_end; /* 32 4 */ char * _IO_save_base; /* 36 4 */ char * _IO_backup_base; /* 40 4 */ char * _IO_save_end; /* 44 4 */ struct _IO_marker * _markers; /* 48 4 */ struct _IO_FILE * _chain; /* 52 4 */ int _fileno; /* 56 4 */ int _flags2; /* 60 4 */ __off_t _old_offset; /* 64 4 */ short unsigned int _cur_column; /* 68 2 */ signed char _vtable_offset; /* 70 1 */ char _shortbuf[1]; /* 71 1 */ _IO_lock_t * _lock; /* 72 4 */ /* size: 76, cachelines: 1, members: 21 */ /* last cacheline: 76 bytes */ }; The structure is fully packed and there's no padding in the legacy part I'm afraid, and I guess it's the case for the majority of 32-bit targets, which don't imply alignment beyond 4 bytes for pointers. Have I missed anything? We have bits available in _flags2 though, shall we consume 8 of them? It might not end up pretty, but... And we have an established practice of reusing this member. Maciej
* Maciej W. Rozycki: > The structure is fully packed and there's no padding in the legacy part > I'm afraid, and I guess it's the case for the majority of 32-bit targets, > which don't imply alignment beyond 4 bytes for pointers. Have I missed > anything? No, you are right. So the extension area has to be used. I think we should fail ungetc if the malloc call fails and the extension area is unavailable, instead of resorting to heroic bit-stuffing. Thanks, Florian
On 2024-11-29 02:02, Florian Weimer wrote: > * Maciej W. Rozycki: > >> The structure is fully packed and there's no padding in the legacy part >> I'm afraid, and I guess it's the case for the majority of 32-bit targets, >> which don't imply alignment beyond 4 bytes for pointers. Have I missed >> anything? > > No, you are right. So the extension area has to be used. I think we > should fail ungetc if the malloc call fails and the extension area is > unavailable, instead of resorting to heroic bit-stuffing. So basically the original struct layout I had proposed plus this in genops.c should do it? diff --git a/libio/genops.c b/libio/genops.c index d7e35e67d5..5b8bb52ec3 100644 --- a/libio/genops.c +++ b/libio/genops.c @@ -1002,7 +1002,14 @@ _IO_default_pbackfail (FILE *fp, int c) int backup_size = 128; char *bbuf = (char *) malloc (backup_size); if (bbuf == NULL) - return EOF; + { +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) + if (__glibc_unlikely (_IO_vtable_offset (fp) != 0)) + return EOF; +#endif + bbuf = fp->_short_backupbuf; + backup_size = 1; + } fp->_IO_save_base = bbuf; fp->_IO_save_end = fp->_IO_save_base + backup_size; fp->_IO_backup_base = fp->_IO_save_end;
On Fri, 29 Nov 2024, Florian Weimer wrote: > > The structure is fully packed and there's no padding in the legacy part > > I'm afraid, and I guess it's the case for the majority of 32-bit targets, > > which don't imply alignment beyond 4 bytes for pointers. Have I missed > > anything? > > No, you are right. So the extension area has to be used. I think we > should fail ungetc if the malloc call fails and the extension area is > unavailable, instead of resorting to heroic bit-stuffing. What's wrong with reusing flags2? We have 25 contiguous bits left and at the rate we've been consuming them here we'll need another 40 years before we need the last 8. Besides, we've released a couple already and isn't it internal stuff anyway we can rearrange on a whim? Maciej
On 2024-11-29 07:15, Maciej W. Rozycki wrote: > On Fri, 29 Nov 2024, Florian Weimer wrote: > >>> The structure is fully packed and there's no padding in the legacy part >>> I'm afraid, and I guess it's the case for the majority of 32-bit targets, >>> which don't imply alignment beyond 4 bytes for pointers. Have I missed >>> anything? >> >> No, you are right. So the extension area has to be used. I think we >> should fail ungetc if the malloc call fails and the extension area is >> unavailable, instead of resorting to heroic bit-stuffing. > > What's wrong with reusing flags2? We have 25 contiguous bits left and at > the rate we've been consuming them here we'll need another 40 years before > we need the last 8. Besides, we've released a couple already and isn't it > internal stuff anyway we can rearrange on a whim? I need to stuff a whole char in there, for which we'll have to put flags2 and the buf into a union { int flags2; char shortbuf[sizeof (int)];} and then take care only to use the bottom char in that buffer. That seems like too much cruft to support legacy uses IMO. Sid
* Siddhesh Poyarekar: > On 2024-11-29 02:02, Florian Weimer wrote: >> * Maciej W. Rozycki: >> >>> The structure is fully packed and there's no padding in the legacy part >>> I'm afraid, and I guess it's the case for the majority of 32-bit targets, >>> which don't imply alignment beyond 4 bytes for pointers. Have I missed >>> anything? >> No, you are right. So the extension area has to be used. I think >> we >> should fail ungetc if the malloc call fails and the extension area is >> unavailable, instead of resorting to heroic bit-stuffing. > > So basically the original struct layout I had proposed plus this in > genops.c should do it? > > diff --git a/libio/genops.c b/libio/genops.c > index d7e35e67d5..5b8bb52ec3 100644 > --- a/libio/genops.c > +++ b/libio/genops.c > @@ -1002,7 +1002,14 @@ _IO_default_pbackfail (FILE *fp, int c) > int backup_size = 128; > char *bbuf = (char *) malloc (backup_size); > if (bbuf == NULL) > - return EOF; > + { > +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) > + if (__glibc_unlikely (_IO_vtable_offset (fp) != 0)) > + return EOF; > +#endif > + bbuf = fp->_short_backupbuf; > + backup_size = 1; > + } > fp->_IO_save_base = bbuf; > fp->_IO_save_end = fp->_IO_save_base + backup_size; > fp->_IO_backup_base = fp->_IO_save_end; Maybe without the SHLIB_COMPAT, I think the other checks don't have that condition. But yes, I think that's the way to check it. Thanks, Florian
On Fri, 29 Nov 2024, Siddhesh Poyarekar wrote: > > What's wrong with reusing flags2? We have 25 contiguous bits left and at > > the rate we've been consuming them here we'll need another 40 years before > > we need the last 8. Besides, we've released a couple already and isn't it > > internal stuff anyway we can rearrange on a whim? > > I need to stuff a whole char in there, for which we'll have to put flags2 and > the buf into a union { int flags2; char shortbuf[sizeof (int)];} and then take > care only to use the bottom char in that buffer. That seems like too much > cruft to support legacy uses IMO. Couldn't we just redefine flags2 as short and have two extra char members for free? Is there any ABI out there that packs an int differently from a short and a pair of char members provided the struct requires alignment of at least 4 bytes already and there's no padding for the int? Maciej
diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h index d8d26639d1..6b5295fce5 100644 --- a/libio/bits/types/struct_FILE.h +++ b/libio/bits/types/struct_FILE.h @@ -94,8 +94,9 @@ struct _IO_FILE_complete void *_freeres_buf; struct _IO_FILE **_prevchain; int _mode; + char _short_backupbuf[1]; /* Make sure we don't get into trouble again. */ - char _unused2[15 * sizeof (int) - 5 * sizeof (void *)]; + char _unused2[15 * sizeof (int) - 5 * sizeof (void *) - sizeof (char)]; }; /* These macros are used by bits/stdio.h and internal headers. */ diff --git a/libio/fileops.c b/libio/fileops.c index 4db4a76f75..67e57e0d8c 100644 --- a/libio/fileops.c +++ b/libio/fileops.c @@ -478,11 +478,8 @@ _IO_new_file_underflow (FILE *fp) if (fp->_IO_buf_base == NULL) { /* Maybe we already have a push back pointer. */ - if (fp->_IO_save_base != NULL) - { - free (fp->_IO_save_base); - fp->_flags &= ~_IO_IN_BACKUP; - } + if (_IO_have_backup (fp)) + _IO_free_backup_area (fp); _IO_doallocbuf (fp); } @@ -930,11 +927,8 @@ _IO_new_file_seekoff (FILE *fp, off64_t offset, int dir, int mode) if (fp->_IO_buf_base == NULL) { /* It could be that we already have a pushback buffer. */ - if (fp->_IO_read_base != NULL) - { - free (fp->_IO_read_base); - fp->_flags &= ~_IO_IN_BACKUP; - } + if (_IO_have_backup (fp)) + _IO_free_backup_area (fp); _IO_doallocbuf (fp); _IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base); _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base); @@ -1280,11 +1274,8 @@ _IO_file_xsgetn (FILE *fp, void *data, size_t n) if (fp->_IO_buf_base == NULL) { /* Maybe we already have a push back pointer. */ - if (fp->_IO_save_base != NULL) - { - free (fp->_IO_save_base); - fp->_flags &= ~_IO_IN_BACKUP; - } + if (_IO_have_backup (fp)) + _IO_free_backup_area (fp); _IO_doallocbuf (fp); } diff --git a/libio/genops.c b/libio/genops.c index 6545a78ad5..02b38fbc9a 100644 --- a/libio/genops.c +++ b/libio/genops.c @@ -48,6 +48,13 @@ flush_cleanup (void *not_used) } #endif +static void +free_backup_buf (FILE *fp, char *ptr) +{ + if (fp->_short_backupbuf != ptr) + free (ptr); +} + /* Fields in struct _IO_FILE after the _lock field are internal to glibc and opaque to applications. We can change them as long as the size of struct _IO_FILE is unchanged, which is checked as the @@ -212,7 +219,7 @@ _IO_free_backup_area (FILE *fp) { if (_IO_in_backup (fp)) _IO_switch_to_main_get_area (fp); /* Just in case. */ - free (fp->_IO_save_base); + free_backup_buf (fp, fp->_IO_save_base); fp->_IO_save_base = NULL; fp->_IO_save_end = NULL; fp->_IO_backup_base = NULL; @@ -260,7 +267,7 @@ save_for_backup (FILE *fp, char *end_p) memcpy (new_buffer + avail, fp->_IO_read_base + least_mark, needed_size); - free (fp->_IO_save_base); + free_backup_buf (fp, fp->_IO_save_base); fp->_IO_save_base = new_buffer; fp->_IO_save_end = new_buffer + avail + needed_size; } @@ -634,7 +641,7 @@ _IO_default_finish (FILE *fp, int dummy) for (mark = fp->_markers; mark != NULL; mark = mark->_next) mark->_sbuf = NULL; - if (fp->_IO_save_base) + if (fp->_IO_save_base && fp->_IO_save_base != fp->_short_backupbuf) { free (fp->_IO_save_base); fp->_IO_save_base = NULL; @@ -997,14 +1004,10 @@ _IO_default_pbackfail (FILE *fp, int c) } else if (!_IO_have_backup (fp)) { - /* No backup buffer: allocate one. */ - /* Use nshort buffer, if unused? (probably not) FIXME */ - int backup_size = 128; - char *bbuf = (char *) malloc (backup_size); - if (bbuf == NULL) - return EOF; - fp->_IO_save_base = bbuf; - fp->_IO_save_end = fp->_IO_save_base + backup_size; + /* We need to guarantee one pushback, so start with the built-in + 1-char buffer. */ + fp->_IO_save_base = fp->_short_backupbuf; + fp->_IO_save_end = fp->_IO_save_base + 1; fp->_IO_backup_base = fp->_IO_save_end; } fp->_IO_read_base = fp->_IO_read_ptr; @@ -1022,7 +1025,7 @@ _IO_default_pbackfail (FILE *fp, int c) return EOF; memcpy (new_buf + (new_size - old_size), fp->_IO_read_base, old_size); - free (fp->_IO_read_base); + free_backup_buf (fp, fp->_IO_read_base); _IO_setg (fp, new_buf, new_buf + (new_size - old_size), new_buf + new_size); fp->_IO_backup_base = fp->_IO_read_ptr; diff --git a/stdio-common/Makefile b/stdio-common/Makefile index a166eb7cf8..5e0ec763ff 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -275,6 +275,7 @@ tests := \ tst-tmpnam \ tst-ungetc \ tst-ungetc-leak \ + tst-ungetc-nomem \ tst-unlockedio \ tst-vfprintf-mbs-prec \ tst-vfprintf-user-type \ diff --git a/stdio-common/tst-ungetc-nomem.c b/stdio-common/tst-ungetc-nomem.c new file mode 100644 index 0000000000..bf36229a9c --- /dev/null +++ b/stdio-common/tst-ungetc-nomem.c @@ -0,0 +1,94 @@ +/* Test ungetc behavior with malloc failures. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <string.h> +#include <support/check.h> +#include <support/support.h> +#include <support/temp_file.h> + +extern void *__libc_malloc (size_t) + __attribute__ ((malloc)) __attribute__ ((alloc_size (1))); + +static bool fail = false; + +void * +malloc (size_t sz) +{ + if (fail) + return NULL; + + return __libc_malloc (sz); +} + +static int +do_test (void) +{ + char *filename = NULL; + struct stat props = {}; + size_t bufsz = 0; + + create_temp_file ("tst-ungetc-nomem.", &filename); + if (stat (filename, &props) != 0) + FAIL_EXIT1 ("Could not get file status: %m\n"); + + FILE *fp = fopen (filename, "w"); + + /* The libio buffer sizes are the same as block size. */ + bufsz = props.st_blksize + 2; + + char *buf = xmalloc (bufsz); + memset (buf, 'a', bufsz); + + size_t remaining = bufsz; + while (remaining > 0) + { + size_t done = fwrite (buf, sizeof (char), remaining, fp); + if (done == 0) + break; + remaining -= done; + } + fclose (fp); + + /* Begin test. */ + fp = fopen (filename, "r"); + + + /* The standard requires the first ungetc to always work. */ + fail = true; + TEST_COMPARE (ungetc('y', fp), 'y'); + + /* Now let the buffers get allocated to allow for subsequent tests. */ + fail = false; + TEST_COMPARE (fgetc (fp), 'y'); + TEST_COMPARE (ungetc('y', fp), 'y'); + TEST_COMPARE (fgetc (fp), 'y'); + + while (!feof (fp)) + { + fail = true; + TEST_COMPARE (ungetc('y', fp), 'y'); + fail = false; + TEST_COMPARE (fgetc (fp), 'y'); + if (fgetc (fp) != 'a') + TEST_COMPARE (ferror (fp), 0); + } + return 0; +} + +#include <support/test-driver.c>
The C standard requires that ungetc guarantees at least one pushback, so put a single byte pushback buffer in the FILE struct to enable that. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org> --- libio/bits/types/struct_FILE.h | 3 +- libio/fileops.c | 21 +++----- libio/genops.c | 27 +++++----- stdio-common/Makefile | 1 + stdio-common/tst-ungetc-nomem.c | 94 +++++++++++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 28 deletions(-) create mode 100644 stdio-common/tst-ungetc-nomem.c