diff mbox series

[v3] Fix #27777 - now use a doubly-linked list for _IO_list_all

Message ID 20240506135238.1344580-1-hjl.tools@gmail.com
State New
Headers show
Series [v3] Fix #27777 - now use a doubly-linked list for _IO_list_all | expand

Commit Message

H.J. Lu May 6, 2024, 1:52 p.m. UTC
From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>

This patch fixes BZ #27777 "fclose does a linear search, takes ages when
many FILE* are opened".  Simply put, the master list of opened (FILE*),
namely _IO_list_all, is a singly-linked list.  As a consequence, the
removal of a single element is in O(N), which cripples the performance
of fclose().  The patch switches to a doubly-linked list, yielding O(1)
removal.  The one padding field in struct _IO_FILE, __pad5, is renamed
to _prevchain for a doubly-linked list.  Since fields in struct _IO_FILE
after the _lock field are internal to glibc and opaque to applications.
We can change them as long as the size of struct _IO_FILE is unchanged,
which is checked as the part of glibc ABI with sizes of _IO_2_1_stdin_,
_IO_2_1_stdout_ and _IO_2_1_stderr_.

NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
will be copied to applications at run-time.  It is used to check if
the _prevchain field can be safely accessed.

As tst-fclose.c shows, after opening 2 million (FILE*), the fclose() of
100 of them takes quite a few seconds without the patch, and under 2
seconds with it using "make check -jN" on a loaded machine.

Co-Authored-By: H.J. Lu <hjl.tools@gmail.com>
---
 libio/Makefile                 |  1 +
 libio/bits/types/struct_FILE.h |  4 +--
 libio/genops.c                 | 26 ++++++++++++++++
 libio/stdfiles.c               | 15 +++++++++
 libio/tst-fclose.c             | 56 ++++++++++++++++++++++++++++++++++
 5 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 libio/tst-fclose.c

Comments

H.J. Lu May 13, 2024, 12:43 p.m. UTC | #1
On Mon, May 6, 2024 at 6:52 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
>
> This patch fixes BZ #27777 "fclose does a linear search, takes ages when
> many FILE* are opened".  Simply put, the master list of opened (FILE*),
> namely _IO_list_all, is a singly-linked list.  As a consequence, the
> removal of a single element is in O(N), which cripples the performance
> of fclose().  The patch switches to a doubly-linked list, yielding O(1)
> removal.  The one padding field in struct _IO_FILE, __pad5, is renamed
> to _prevchain for a doubly-linked list.  Since fields in struct _IO_FILE
> after the _lock field are internal to glibc and opaque to applications.
> We can change them as long as the size of struct _IO_FILE is unchanged,
> which is checked as the part of glibc ABI with sizes of _IO_2_1_stdin_,
> _IO_2_1_stdout_ and _IO_2_1_stderr_.
>
> NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
> whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
> will be copied to applications at run-time.  It is used to check if
> the _prevchain field can be safely accessed.
>
> As tst-fclose.c shows, after opening 2 million (FILE*), the fclose() of
> 100 of them takes quite a few seconds without the patch, and under 2
> seconds with it using "make check -jN" on a loaded machine.
>
> Co-Authored-By: H.J. Lu <hjl.tools@gmail.com>
> ---
>  libio/Makefile                 |  1 +
>  libio/bits/types/struct_FILE.h |  4 +--
>  libio/genops.c                 | 26 ++++++++++++++++
>  libio/stdfiles.c               | 15 +++++++++
>  libio/tst-fclose.c             | 56 ++++++++++++++++++++++++++++++++++
>  5 files changed, 100 insertions(+), 2 deletions(-)
>  create mode 100644 libio/tst-fclose.c
>
> diff --git a/libio/Makefile b/libio/Makefile
> index 0c1f16ee3b..7f598c840c 100644
> --- a/libio/Makefile
> +++ b/libio/Makefile
> @@ -94,6 +94,7 @@ tests = \
>    tst-eof \
>    tst-ext \
>    tst-ext2 \
> +  tst-fclose \
>    tst-fgetc-after-eof \
>    tst-fgetwc \
>    tst-fgetws \
> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
> index 7cdaae86f8..d8d26639d1 100644
> --- a/libio/bits/types/struct_FILE.h
> +++ b/libio/bits/types/struct_FILE.h
> @@ -92,10 +92,10 @@ struct _IO_FILE_complete
>    struct _IO_wide_data *_wide_data;
>    struct _IO_FILE *_freeres_list;
>    void *_freeres_buf;
> -  size_t __pad5;
> +  struct _IO_FILE **_prevchain;
>    int _mode;
>    /* Make sure we don't get into trouble again.  */
> -  char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
> +  char _unused2[15 * sizeof (int) - 5 * sizeof (void *)];
>  };
>
>  /* These macros are used by bits/stdio.h and internal headers.  */
> diff --git a/libio/genops.c b/libio/genops.c
> index bc45e60a09..994ee9c0b1 100644
> --- a/libio/genops.c
> +++ b/libio/genops.c
> @@ -48,6 +48,19 @@ flush_cleanup (void *not_used)
>  }
>  #endif
>
> +/* Fields in struct _IO_FILE after the _lock field are internal to
> +   glibc and opaque to applications.  We can change them as long as
> +   the size of struct _IO_FILE is unchanged, which is checked as the
> +   part of glibc ABI with sizes of _IO_2_1_stdin_, _IO_2_1_stdout_
> +   and _IO_2_1_stderr_.
> +
> +   NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
> +   whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
> +   will be copied.  */
> +_Static_assert (offsetof (struct _IO_FILE, _prevchain)
> +               > offsetof (struct _IO_FILE, _lock),
> +               "offset of _prevchain > offset of _lock");
> +
>  void
>  _IO_un_link (struct _IO_FILE_plus *fp)
>  {
> @@ -62,6 +75,14 @@ _IO_un_link (struct _IO_FILE_plus *fp)
>  #endif
>        if (_IO_list_all == NULL)
>         ;
> +      else if (_IO_vtable_offset ((FILE *) fp) == 0)
> +       {
> +         FILE **pr = fp->file._prevchain;
> +         FILE *nx = fp->file._chain;
> +         *pr = nx;
> +         if (nx != NULL)
> +           nx->_prevchain = pr;
> +       }
>        else if (fp == _IO_list_all)
>         _IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
>        else
> @@ -95,6 +116,11 @@ _IO_link_in (struct _IO_FILE_plus *fp)
>        _IO_flockfile ((FILE *) fp);
>  #endif
>        fp->file._chain = (FILE *) _IO_list_all;
> +      if (_IO_vtable_offset ((FILE *) fp) == 0)
> +       {
> +         fp->file._prevchain = (FILE **) &_IO_list_all;
> +         _IO_list_all->file._prevchain = &fp->file._chain;
> +       }
>        _IO_list_all = fp;
>  #ifdef _IO_MTSAFE_IO
>        _IO_funlockfile ((FILE *) fp);
> diff --git a/libio/stdfiles.c b/libio/stdfiles.c
> index cd8eca8bf3..d607fa02e0 100644
> --- a/libio/stdfiles.c
> +++ b/libio/stdfiles.c
> @@ -54,4 +54,19 @@ DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
>  DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
>
>  struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
> +
> +/* Finish the double-linking for stdfiles as static initialization
> +   cannot.  */
> +
> +__THROW __attribute__ ((constructor))
> +static void
> +_IO_stdfiles_init (void)
> +{
> +  struct _IO_FILE **f;
> +  for (f = (struct _IO_FILE **) &_IO_list_all;
> +       *f != NULL;
> +       f = &(*f)->_chain)
> +    (*f)->_prevchain = f;
> +}
> +
>  libc_hidden_data_def (_IO_list_all)
> diff --git a/libio/tst-fclose.c b/libio/tst-fclose.c
> new file mode 100644
> index 0000000000..bc0c216e29
> --- /dev/null
> +++ b/libio/tst-fclose.c
> @@ -0,0 +1,56 @@
> +/* Verify fclose performance with many opened files.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <support/test-driver.h>
> +
> +#define N 2000000
> +#define M 100
> +
> +static int
> +do_test (void)
> +{
> +  FILE *ff, *keep[M];
> +  int i;
> +
> +  fprintf (stderr, "Preparing %d FILEs ...\n", N);
> +  fflush (stderr);
> +  for (i = 0; i < N; i++)
> +    {
> +      ff = fdopen (STDIN_FILENO, "r");
> +      if (!ff)
> +       {
> +         fprintf (stderr, "### failed to fdopen: %m\n");
> +         return EXIT_FAILURE;
> +       }
> +      if (i < M)
> +       keep[i] = ff;
> +    }
> +  fprintf (stderr, "Now fclosing %d FILEs ...", M);
> +  fflush (stderr);
> +  for (i = 0; i < M; i++)
> +    fclose (keep[i]);
> +  fprintf (stderr, "DONE\n");
> +  fflush (stderr);
> +  return EXIT_SUCCESS;
> +}
> +
> +#define TIMEOUT 2
> +#include <support/test-driver.c>
> --
> 2.45.0
>

PING
Andreas Schwab May 13, 2024, 1:04 p.m. UTC | #2
On Mai 06 2024, H.J. Lu wrote:

> diff --git a/libio/tst-fclose.c b/libio/tst-fclose.c
> new file mode 100644
> index 0000000000..bc0c216e29
> --- /dev/null
> +++ b/libio/tst-fclose.c
> @@ -0,0 +1,56 @@
> +/* Verify fclose performance with many opened files.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <support/test-driver.h>
> +
> +#define N 2000000
> +#define M 100
> +
> +static int
> +do_test (void)
> +{
> +  FILE *ff, *keep[M];
> +  int i;
> +
> +  fprintf (stderr, "Preparing %d FILEs ...\n", N);
> +  fflush (stderr);
> +  for (i = 0; i < N; i++)
> +    {
> +      ff = fdopen (STDIN_FILENO, "r");
> +      if (!ff)
> +	{
> +	  fprintf (stderr, "### failed to fdopen: %m\n");
> +	  return EXIT_FAILURE;
> +	}
> +      if (i < M)
> +	keep[i] = ff;
> +    }
> +  fprintf (stderr, "Now fclosing %d FILEs ...", M);
> +  fflush (stderr);
> +  for (i = 0; i < M; i++)
> +    fclose (keep[i]);
> +  fprintf (stderr, "DONE\n");
> +  fflush (stderr);
> +  return EXIT_SUCCESS;
> +}
> +
> +#define TIMEOUT 2
> +#include <support/test-driver.c>

Why do you override TIMEOUT?
H.J. Lu May 13, 2024, 1:08 p.m. UTC | #3
On Mon, May 13, 2024 at 6:04 AM Andreas Schwab <schwab@suse.de> wrote:
>
> On Mai 06 2024, H.J. Lu wrote:
>
> > diff --git a/libio/tst-fclose.c b/libio/tst-fclose.c
> > new file mode 100644
> > index 0000000000..bc0c216e29
> > --- /dev/null
> > +++ b/libio/tst-fclose.c
> > @@ -0,0 +1,56 @@
> > +/* Verify fclose performance with many opened files.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <unistd.h>
> > +#include <support/test-driver.h>
> > +
> > +#define N 2000000
> > +#define M 100
> > +
> > +static int
> > +do_test (void)
> > +{
> > +  FILE *ff, *keep[M];
> > +  int i;
> > +
> > +  fprintf (stderr, "Preparing %d FILEs ...\n", N);
> > +  fflush (stderr);
> > +  for (i = 0; i < N; i++)
> > +    {
> > +      ff = fdopen (STDIN_FILENO, "r");
> > +      if (!ff)
> > +     {
> > +       fprintf (stderr, "### failed to fdopen: %m\n");
> > +       return EXIT_FAILURE;
> > +     }
> > +      if (i < M)
> > +     keep[i] = ff;
> > +    }
> > +  fprintf (stderr, "Now fclosing %d FILEs ...", M);
> > +  fflush (stderr);
> > +  for (i = 0; i < M; i++)
> > +    fclose (keep[i]);
> > +  fprintf (stderr, "DONE\n");
> > +  fflush (stderr);
> > +  return EXIT_SUCCESS;
> > +}
> > +
> > +#define TIMEOUT 2
> > +#include <support/test-driver.c>
>
> Why do you override TIMEOUT?

This is to ensure that the test will timeout if the patch isn't
applied.  The test may pass with the original TIMEOUT
without the patch.
Andreas Schwab May 13, 2024, 1:22 p.m. UTC | #4
On Mai 13 2024, H.J. Lu wrote:

> This is to ensure that the test will timeout if the patch isn't
> applied.

That won't work.
H.J. Lu May 13, 2024, 1:51 p.m. UTC | #5
On Mon, May 13, 2024 at 6:22 AM Andreas Schwab <schwab@suse.de> wrote:
>
> On Mai 13 2024, H.J. Lu wrote:
>
> > This is to ensure that the test will timeout if the patch isn't
> > applied.
>
> That won't work.
>

Test is dropped in the V5 patch.
diff mbox series

Patch

diff --git a/libio/Makefile b/libio/Makefile
index 0c1f16ee3b..7f598c840c 100644
--- a/libio/Makefile
+++ b/libio/Makefile
@@ -94,6 +94,7 @@  tests = \
   tst-eof \
   tst-ext \
   tst-ext2 \
+  tst-fclose \
   tst-fgetc-after-eof \
   tst-fgetwc \
   tst-fgetws \
diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
index 7cdaae86f8..d8d26639d1 100644
--- a/libio/bits/types/struct_FILE.h
+++ b/libio/bits/types/struct_FILE.h
@@ -92,10 +92,10 @@  struct _IO_FILE_complete
   struct _IO_wide_data *_wide_data;
   struct _IO_FILE *_freeres_list;
   void *_freeres_buf;
-  size_t __pad5;
+  struct _IO_FILE **_prevchain;
   int _mode;
   /* Make sure we don't get into trouble again.  */
-  char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
+  char _unused2[15 * sizeof (int) - 5 * sizeof (void *)];
 };
 
 /* These macros are used by bits/stdio.h and internal headers.  */
diff --git a/libio/genops.c b/libio/genops.c
index bc45e60a09..994ee9c0b1 100644
--- a/libio/genops.c
+++ b/libio/genops.c
@@ -48,6 +48,19 @@  flush_cleanup (void *not_used)
 }
 #endif
 
+/* Fields in struct _IO_FILE after the _lock field are internal to
+   glibc and opaque to applications.  We can change them as long as
+   the size of struct _IO_FILE is unchanged, which is checked as the
+   part of glibc ABI with sizes of _IO_2_1_stdin_, _IO_2_1_stdout_
+   and _IO_2_1_stderr_.
+
+   NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
+   whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
+   will be copied.  */
+_Static_assert (offsetof (struct _IO_FILE, _prevchain)
+		> offsetof (struct _IO_FILE, _lock),
+		"offset of _prevchain > offset of _lock");
+
 void
 _IO_un_link (struct _IO_FILE_plus *fp)
 {
@@ -62,6 +75,14 @@  _IO_un_link (struct _IO_FILE_plus *fp)
 #endif
       if (_IO_list_all == NULL)
 	;
+      else if (_IO_vtable_offset ((FILE *) fp) == 0)
+	{
+	  FILE **pr = fp->file._prevchain;
+	  FILE *nx = fp->file._chain;
+	  *pr = nx;
+	  if (nx != NULL)
+	    nx->_prevchain = pr;
+	}
       else if (fp == _IO_list_all)
 	_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
       else
@@ -95,6 +116,11 @@  _IO_link_in (struct _IO_FILE_plus *fp)
       _IO_flockfile ((FILE *) fp);
 #endif
       fp->file._chain = (FILE *) _IO_list_all;
+      if (_IO_vtable_offset ((FILE *) fp) == 0)
+	{
+	  fp->file._prevchain = (FILE **) &_IO_list_all;
+	  _IO_list_all->file._prevchain = &fp->file._chain;
+	}
       _IO_list_all = fp;
 #ifdef _IO_MTSAFE_IO
       _IO_funlockfile ((FILE *) fp);
diff --git a/libio/stdfiles.c b/libio/stdfiles.c
index cd8eca8bf3..d607fa02e0 100644
--- a/libio/stdfiles.c
+++ b/libio/stdfiles.c
@@ -54,4 +54,19 @@  DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
 DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
 
 struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
+
+/* Finish the double-linking for stdfiles as static initialization
+   cannot.  */
+
+__THROW __attribute__ ((constructor))
+static void
+_IO_stdfiles_init (void)
+{
+  struct _IO_FILE **f;
+  for (f = (struct _IO_FILE **) &_IO_list_all;
+       *f != NULL;
+       f = &(*f)->_chain)
+    (*f)->_prevchain = f;
+}
+
 libc_hidden_data_def (_IO_list_all)
diff --git a/libio/tst-fclose.c b/libio/tst-fclose.c
new file mode 100644
index 0000000000..bc0c216e29
--- /dev/null
+++ b/libio/tst-fclose.c
@@ -0,0 +1,56 @@ 
+/* Verify fclose performance with many opened files.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <support/test-driver.h>
+
+#define N 2000000
+#define M 100
+
+static int
+do_test (void)
+{
+  FILE *ff, *keep[M];
+  int i;
+
+  fprintf (stderr, "Preparing %d FILEs ...\n", N);
+  fflush (stderr);
+  for (i = 0; i < N; i++)
+    {
+      ff = fdopen (STDIN_FILENO, "r");
+      if (!ff)
+	{
+	  fprintf (stderr, "### failed to fdopen: %m\n");
+	  return EXIT_FAILURE;
+	}
+      if (i < M)
+	keep[i] = ff;
+    }
+  fprintf (stderr, "Now fclosing %d FILEs ...", M);
+  fflush (stderr);
+  for (i = 0; i < M; i++)
+    fclose (keep[i]);
+  fprintf (stderr, "DONE\n");
+  fflush (stderr);
+  return EXIT_SUCCESS;
+}
+
+#define TIMEOUT 2
+#include <support/test-driver.c>