diff mbox series

[v6] Use a doubly-linked list for _IO_list_all (bug 27777)

Message ID 20240516125430.1915935-1-hjl.tools@gmail.com
State New
Headers show
Series [v6] Use a doubly-linked list for _IO_list_all (bug 27777) | expand

Commit Message

H.J. Lu May 16, 2024, 12:54 p.m. UTC
From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>

This patch fixes BZ #27777 "fclose does a linear search, takes ages when
many FILE* are opened".  Simply put, the master list of opened (FILE*),
namely _IO_list_all, is a singly-linked list.  As a consequence, the
removal of a single element is in O(N), which cripples the performance
of fclose().  The patch switches to a doubly-linked list, yielding O(1)
removal.  The one padding field in struct _IO_FILE, __pad5, is renamed
to _prevchain for a doubly-linked list.  Since fields in struct _IO_FILE
after the _lock field are internal to glibc and opaque to applications.
We can change them as long as the size of struct _IO_FILE is unchanged,
which is checked as the part of glibc ABI with sizes of _IO_2_1_stdin_,
_IO_2_1_stdout_ and _IO_2_1_stderr_.

NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
will be copied to applications at run-time.  It is used to check if
the _prevchain field can be safely accessed.

After opening 2 million (FILE*), the fclose() of 100 of them takes quite
a few seconds without the patch, and under 2 seconds with it on a loaded
machine.

No test is added since there are no functional changes.

Co-Authored-By: H.J. Lu <hjl.tools@gmail.com>
Signed-off-by: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
---
 libio/bits/types/struct_FILE.h |  4 ++--
 libio/genops.c                 | 26 ++++++++++++++++++++++++++
 libio/stdfiles.c               | 15 +++++++++++++++
 3 files changed, 43 insertions(+), 2 deletions(-)

Comments

Carlos O'Donell May 17, 2024, 7:36 p.m. UTC | #1
On 5/16/24 8:54 AM, H.J. Lu wrote:
> From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>

Commit subject is good.

> 
> This patch fixes BZ #27777 "fclose does a linear search, takes ages when
> many FILE* are opened".  Simply put, the master list of opened (FILE*),
> namely _IO_list_all, is a singly-linked list.  As a consequence, the
> removal of a single element is in O(N), which cripples the performance
> of fclose().  The patch switches to a doubly-linked list, yielding O(1)
> removal.  The one padding field in struct _IO_FILE, __pad5, is renamed
> to _prevchain for a doubly-linked list.  Since fields in struct _IO_FILE
> after the _lock field are internal to glibc and opaque to applications.
> We can change them as long as the size of struct _IO_FILE is unchanged,
> which is checked as the part of glibc ABI with sizes of _IO_2_1_stdin_,
> _IO_2_1_stdout_ and _IO_2_1_stderr_.
> 
> NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
> whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
> will be copied to applications at run-time.  It is used to check if
> the _prevchain field can be safely accessed.
> 
> After opening 2 million (FILE*), the fclose() of 100 of them takes quite
> a few seconds without the patch, and under 2 seconds with it on a loaded
> machine.
> 
> No test is added since there are no functional changes.

Micorbenchmark is already in place (and noted comment from Alexandre that we reopen
the same fd every time) and the numbers look great.

I think we should get this in now and start testing the downstream impact if any.

LGTM.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>


> Co-Authored-By: H.J. Lu <hjl.tools@gmail.com>
> Signed-off-by: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
> Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
> ---
>  libio/bits/types/struct_FILE.h |  4 ++--
>  libio/genops.c                 | 26 ++++++++++++++++++++++++++
>  libio/stdfiles.c               | 15 +++++++++++++++
>  3 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
> index 7cdaae86f8..d8d26639d1 100644
> --- a/libio/bits/types/struct_FILE.h
> +++ b/libio/bits/types/struct_FILE.h
> @@ -92,10 +92,10 @@ struct _IO_FILE_complete
>    struct _IO_wide_data *_wide_data;
>    struct _IO_FILE *_freeres_list;
>    void *_freeres_buf;
> -  size_t __pad5;
> +  struct _IO_FILE **_prevchain;
>    int _mode;
>    /* Make sure we don't get into trouble again.  */
> -  char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
> +  char _unused2[15 * sizeof (int) - 5 * sizeof (void *)];
>  };
>  
>  /* These macros are used by bits/stdio.h and internal headers.  */
> diff --git a/libio/genops.c b/libio/genops.c
> index bc45e60a09..994ee9c0b1 100644
> --- a/libio/genops.c
> +++ b/libio/genops.c
> @@ -48,6 +48,19 @@ flush_cleanup (void *not_used)
>  }
>  #endif
>  
> +/* Fields in struct _IO_FILE after the _lock field are internal to
> +   glibc and opaque to applications.  We can change them as long as
> +   the size of struct _IO_FILE is unchanged, which is checked as the
> +   part of glibc ABI with sizes of _IO_2_1_stdin_, _IO_2_1_stdout_
> +   and _IO_2_1_stderr_.
> +
> +   NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
> +   whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
> +   will be copied.  */
> +_Static_assert (offsetof (struct _IO_FILE, _prevchain)
> +		> offsetof (struct _IO_FILE, _lock),
> +		"offset of _prevchain > offset of _lock");
> +
>  void
>  _IO_un_link (struct _IO_FILE_plus *fp)
>  {
> @@ -62,6 +75,14 @@ _IO_un_link (struct _IO_FILE_plus *fp)
>  #endif
>        if (_IO_list_all == NULL)
>  	;
> +      else if (_IO_vtable_offset ((FILE *) fp) == 0)
> +	{
> +	  FILE **pr = fp->file._prevchain;
> +	  FILE *nx = fp->file._chain;
> +	  *pr = nx;
> +	  if (nx != NULL)
> +	    nx->_prevchain = pr;
> +	}
>        else if (fp == _IO_list_all)
>  	_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
>        else
> @@ -95,6 +116,11 @@ _IO_link_in (struct _IO_FILE_plus *fp)
>        _IO_flockfile ((FILE *) fp);
>  #endif
>        fp->file._chain = (FILE *) _IO_list_all;
> +      if (_IO_vtable_offset ((FILE *) fp) == 0)
> +	{
> +	  fp->file._prevchain = (FILE **) &_IO_list_all;
> +	  _IO_list_all->file._prevchain = &fp->file._chain;
> +	}
>        _IO_list_all = fp;
>  #ifdef _IO_MTSAFE_IO
>        _IO_funlockfile ((FILE *) fp);
> diff --git a/libio/stdfiles.c b/libio/stdfiles.c
> index cd8eca8bf3..12db1645ef 100644
> --- a/libio/stdfiles.c
> +++ b/libio/stdfiles.c
> @@ -54,4 +54,19 @@ DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
>  DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
>  
>  struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
> +
> +/* Finish double-linking stdin, stdout, and stderr in a constructor.
> +   Static initialization cannot complete the _prevchain setup.  */
> +
> +__THROW __attribute__ ((constructor))
> +static void
> +_IO_stdfiles_init (void)
> +{
> +  struct _IO_FILE **f;
> +  for (f = (struct _IO_FILE **) &_IO_list_all;
> +       *f != NULL;
> +       f = &(*f)->_chain)
> +    (*f)->_prevchain = f;
> +}
> +
>  libc_hidden_data_def (_IO_list_all)
Cristian Rodríguez May 19, 2024, 6:02 p.m. UTC | #2
On Fri, May 17, 2024 at 3:36 PM Carlos O'Donell <carlos@redhat.com> wrote:

> I think we should get this in now and start testing the downstream impact if any.
>
> LGTM.
>
> Reviewed-by: Carlos O'Donell <carlos@redhat.com>

Nothing blew up here..it appears to be ok.
Andreas Schwab July 8, 2024, 1:17 p.m. UTC | #3
On Mai 16 2024, H.J. Lu wrote:

> diff --git a/libio/genops.c b/libio/genops.c
> index bc45e60a09..994ee9c0b1 100644
> --- a/libio/genops.c
> +++ b/libio/genops.c
> @@ -48,6 +48,19 @@ flush_cleanup (void *not_used)
>  }
>  #endif
>  
> +/* Fields in struct _IO_FILE after the _lock field are internal to
> +   glibc and opaque to applications.  We can change them as long as
> +   the size of struct _IO_FILE is unchanged, which is checked as the
> +   part of glibc ABI with sizes of _IO_2_1_stdin_, _IO_2_1_stdout_
> +   and _IO_2_1_stderr_.
> +
> +   NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
> +   whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
> +   will be copied.  */
> +_Static_assert (offsetof (struct _IO_FILE, _prevchain)
> +		> offsetof (struct _IO_FILE, _lock),
> +		"offset of _prevchain > offset of _lock");
> +
>  void
>  _IO_un_link (struct _IO_FILE_plus *fp)
>  {
> @@ -62,6 +75,14 @@ _IO_un_link (struct _IO_FILE_plus *fp)
>  #endif
>        if (_IO_list_all == NULL)
>  	;
> +      else if (_IO_vtable_offset ((FILE *) fp) == 0)
> +	{
> +	  FILE **pr = fp->file._prevchain;
> +	  FILE *nx = fp->file._chain;
> +	  *pr = nx;
> +	  if (nx != NULL)
> +	    nx->_prevchain = pr;
> +	}
>        else if (fp == _IO_list_all)
>  	_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
>        else
> @@ -95,6 +116,11 @@ _IO_link_in (struct _IO_FILE_plus *fp)
>        _IO_flockfile ((FILE *) fp);
>  #endif
>        fp->file._chain = (FILE *) _IO_list_all;
> +      if (_IO_vtable_offset ((FILE *) fp) == 0)
> +	{
> +	  fp->file._prevchain = (FILE **) &_IO_list_all;
> +	  _IO_list_all->file._prevchain = &fp->file._chain;

_IO_list_all can be NULL if all streams are closed.  This is what
gnulib's close_stdin does.  If the program is compiled for profiling
there will be a finalizer that opens a file to write the profiling data.
diff mbox series

Patch

diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
index 7cdaae86f8..d8d26639d1 100644
--- a/libio/bits/types/struct_FILE.h
+++ b/libio/bits/types/struct_FILE.h
@@ -92,10 +92,10 @@  struct _IO_FILE_complete
   struct _IO_wide_data *_wide_data;
   struct _IO_FILE *_freeres_list;
   void *_freeres_buf;
-  size_t __pad5;
+  struct _IO_FILE **_prevchain;
   int _mode;
   /* Make sure we don't get into trouble again.  */
-  char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
+  char _unused2[15 * sizeof (int) - 5 * sizeof (void *)];
 };
 
 /* These macros are used by bits/stdio.h and internal headers.  */
diff --git a/libio/genops.c b/libio/genops.c
index bc45e60a09..994ee9c0b1 100644
--- a/libio/genops.c
+++ b/libio/genops.c
@@ -48,6 +48,19 @@  flush_cleanup (void *not_used)
 }
 #endif
 
+/* Fields in struct _IO_FILE after the _lock field are internal to
+   glibc and opaque to applications.  We can change them as long as
+   the size of struct _IO_FILE is unchanged, which is checked as the
+   part of glibc ABI with sizes of _IO_2_1_stdin_, _IO_2_1_stdout_
+   and _IO_2_1_stderr_.
+
+   NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
+   whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
+   will be copied.  */
+_Static_assert (offsetof (struct _IO_FILE, _prevchain)
+		> offsetof (struct _IO_FILE, _lock),
+		"offset of _prevchain > offset of _lock");
+
 void
 _IO_un_link (struct _IO_FILE_plus *fp)
 {
@@ -62,6 +75,14 @@  _IO_un_link (struct _IO_FILE_plus *fp)
 #endif
       if (_IO_list_all == NULL)
 	;
+      else if (_IO_vtable_offset ((FILE *) fp) == 0)
+	{
+	  FILE **pr = fp->file._prevchain;
+	  FILE *nx = fp->file._chain;
+	  *pr = nx;
+	  if (nx != NULL)
+	    nx->_prevchain = pr;
+	}
       else if (fp == _IO_list_all)
 	_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
       else
@@ -95,6 +116,11 @@  _IO_link_in (struct _IO_FILE_plus *fp)
       _IO_flockfile ((FILE *) fp);
 #endif
       fp->file._chain = (FILE *) _IO_list_all;
+      if (_IO_vtable_offset ((FILE *) fp) == 0)
+	{
+	  fp->file._prevchain = (FILE **) &_IO_list_all;
+	  _IO_list_all->file._prevchain = &fp->file._chain;
+	}
       _IO_list_all = fp;
 #ifdef _IO_MTSAFE_IO
       _IO_funlockfile ((FILE *) fp);
diff --git a/libio/stdfiles.c b/libio/stdfiles.c
index cd8eca8bf3..12db1645ef 100644
--- a/libio/stdfiles.c
+++ b/libio/stdfiles.c
@@ -54,4 +54,19 @@  DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
 DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
 
 struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
+
+/* Finish double-linking stdin, stdout, and stderr in a constructor.
+   Static initialization cannot complete the _prevchain setup.  */
+
+__THROW __attribute__ ((constructor))
+static void
+_IO_stdfiles_init (void)
+{
+  struct _IO_FILE **f;
+  for (f = (struct _IO_FILE **) &_IO_list_all;
+       *f != NULL;
+       f = &(*f)->_chain)
+    (*f)->_prevchain = f;
+}
+
 libc_hidden_data_def (_IO_list_all)