diff mbox series

[1/3] gcov: Cache source files

Message ID 20240808072502.3251665-1-j@lambda.is
State New
Headers show
Series [1/3] gcov: Cache source files | expand

Commit Message

Jørgen Kvalsvik Aug. 8, 2024, 7:25 a.m. UTC
Cache the source files as they are read, rather than discarding them at
the end of output_lines (), and move the reading of the source file to
the new function slurp.

This patch does not really change anything other than moving the file
reading out of output_file, but set gcov up for more interaction with
the source file. The motvating example is reporting coverage on
functions from different source files, notably C++ headers and
((always_inline)).

Here is an example of what gcov does today:

hello.h:
inline __attribute__((always_inline))
int hello (const char *s)
{
  if (s)
    printf ("hello, %s!\n", s);
  else
    printf ("hello, world!\n");
  return 0;
}

hello.c:
int notmain(const char *entity)
{
  return hello (entity);
}

int main()
{
  const char *empty = 0;
  if (!empty)
    hello (empty);
  else
    puts ("Goodbye!");
}

$ gcov -abc hello
function notmain called 0 returned 0% blocks executed 0%
    #####:    4:int notmain(const char *entity)
    %%%%%:    4-block 2
branch  0 never executed (fallthrough)
branch  1 never executed
        -:    5:{
    #####:    6:  return hello (entity);
    %%%%%:    6-block 7
        -:    7:}

Clearly there is a branch in notmain, but the branch comes from the
inlining of hello. This is not very obvious from looking at the output.
Here is hello.h.gcov:

        -:    3:inline __attribute__((always_inline))
        -:    4:int hello (const char *s)
        -:    5:{
    #####:    6:  if (s)
    %%%%%:    6-block 3
branch  0 never executed (fallthrough)
branch  1 never executed
    %%%%%:    6-block 2
branch  2 never executed (fallthrough)
branch  3 never executed
    #####:    7:    printf ("hello, %s!\n", s);
    %%%%%:    7-block 4
call    0 never executed
    %%%%%:    7-block 3
call    1 never executed
        -:    8:  else
    #####:    9:    printf ("hello, world!\n");
    %%%%%:    9-block 5
call    0 never executed
    %%%%%:    9-block 4
call    1 never executed
    #####:   10:  return 0;
    %%%%%:   10-block 6
    %%%%%:   10-block 5
        -:   11:}

The blocks from the different call sites have all been interleaved.

The reporting could tuned be to list the inlined function, too, like
this:

        1:    4:int notmain(const char *entity)
        -: == inlined from hello.h ==
        1:    6:  if (s)
branch  0 taken 0 (fallthrough)
branch  1 taken 1
    #####:    7:    printf ("hello, %s!\n", s);
    %%%%%:    7-block 3
call    0 never executed
        -:    8:  else
        1:    9:    printf ("hello, world!\n");
        1:    9-block 4
call    0 returned 1
        1:   10:  return 0;
        1:   10-block 5
        -: == inlined from hello.h (end) ==
        -:    5:{
        1:    6:  return hello (entity);
        1:    6-block 7
        -:    7:}

Implementing something to this effect relies on having the sources for
both files (hello.c, hello.h) available, which is what this patch sets
up.

Note that the previous reading code would leak the source file content,
and explicitly storing them is not a huge departure nor performance
implication. I verified this with valgrind:

With slurp:

$ valgrind gcov ./hello
== == Memcheck, a memory error detector
== == Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
== == Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
== == Command: ./gcc/gcov demo
== ==
File 'hello.c'
Lines executed:100.00% of 4
Creating 'hello.c.gcov'

File 'hello.h'
Lines executed:75.00% of 4
Creating 'hello.h.gcov'
== ==
== == HEAP SUMMARY:
== ==     in use at exit: 84,907 bytes in 54 blocks
== ==   total heap usage: 254 allocs, 200 frees, 137,156 bytes allocated
== ==
== == LEAK SUMMARY:
== ==    definitely lost: 1,237 bytes in 22 blocks
== ==    indirectly lost: 562 bytes in 18 blocks
== ==      possibly lost: 0 bytes in 0 blocks
== ==    still reachable: 83,108 bytes in 14 blocks
== ==                       of which reachable via heuristic:
== ==                         newarray           : 1,544 bytes in 1 blocks
== ==         suppressed: 0 bytes in 0 blocks
== == Rerun with --leak-check=full to see details of leaked memory
== ==
== == For lists of detected and suppressed errors, rerun with: -s
== == ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

Without slurp:

$ valgrind gcov ./demo
== == Memcheck, a memory error detector
== == Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
== == Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
== == Command: ./gcc/gcov demo
== ==
File 'hello.c'
Lines executed:100.00% of 4
Creating 'hello.c.gcov'

File 'hello.h'
Lines executed:75.00% of 4
Creating 'hello.h.gcov'

Lines executed:87.50% of 8
== ==
== == HEAP SUMMARY:
== ==     in use at exit: 85,316 bytes in 82 blocks
== ==   total heap usage: 250 allocs, 168 frees, 137,084 bytes allocated
== ==
== == LEAK SUMMARY:
== ==    definitely lost: 1,646 bytes in 50 blocks
== ==    indirectly lost: 562 bytes in 18 blocks
== ==      possibly lost: 0 bytes in 0 blocks
== ==    still reachable: 83,108 bytes in 14 blocks
== ==                       of which reachable via heuristic:
== ==                         newarray           : 1,544 bytes in 1 blocks
== ==         suppressed: 0 bytes in 0 blocks
== == Rerun with --leak-check=full to see details of leaked memory
== ==
== == For lists of detected and suppressed errors, rerun with: -s
== == ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

gcc/ChangeLog:

	* gcov.cc (release_structures): Release source_lines.
	(slurp): New function.
	(output_lines): Read sources with slurp.
---
 gcc/gcov.cc | 70 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 53 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/gcc/gcov.cc b/gcc/gcov.cc
index e76a314041c..5eb40f94b99 100644
--- a/gcc/gcov.cc
+++ b/gcc/gcov.cc
@@ -550,6 +550,11 @@  static vector<name_map> names;
    a file being read multiple times.  */
 static vector<char *> processed_files;
 
+/* The contents of a source file.  The nth SOURCE_LINES entry is the
+   contents of the nth SOURCES, or empty if it has not or could not be
+   read.  */
+static vector<vector<const char *>*> source_lines;
+
 /* This holds data summary information.  */
 
 static unsigned object_runs;
@@ -762,6 +767,8 @@  static string make_gcov_file_name (const char *, const char *);
 static char *mangle_name (const char *);
 static void release_structures (void);
 extern int main (int, char **);
+const vector<const char *>&
+slurp (const source_info &src, FILE *gcov_file, const char *line_start);
 
 function_info::function_info (): m_name (NULL), m_demangled_name (NULL),
   ident (0), lineno_checksum (0), cfg_checksum (0), has_catch (0),
@@ -1804,6 +1811,15 @@  release_structures (void)
        it != functions.end (); it++)
     delete (*it);
 
+  for (vector<const char *> *lines : source_lines)
+    {
+      if (lines)
+	for (const char *line : *lines)
+	  free (const_cast <char*> (line));
+      delete (lines);
+    }
+  source_lines.resize (0);
+
   for (fnfilter &filter : filters)
     regfree (&filter.regex);
 
@@ -3246,6 +3262,41 @@  read_line (FILE *file)
   return pos ? string : NULL;
 }
 
+/* Get the vector with the contents SRC, possibly from a cache.  If
+   the reading fails, a message prefixed with LINE_START is written to
+   GCOV_FILE.  */
+static const vector<const char *>&
+slurp (const source_info &src, FILE *gcov_file,
+       const char *line_start)
+{
+  if (source_lines.size () <= src.index)
+    source_lines.resize (src.index + 1);
+
+  /* Store vector pointers so that the returned references remain
+     stable and won't be broken by successive calls to slurp.  */
+  if (!source_lines[src.index])
+    source_lines[src.index] = new vector<const char *> ();
+
+  if (!source_lines[src.index]->empty ())
+    return *source_lines[src.index];
+
+  FILE *source_file = fopen (src.name, "r");
+  if (!source_file)
+    fnotice (stderr, "Cannot open source file %s\n", src.name);
+  else if (src.file_time == 0)
+    fprintf (gcov_file, "%sSource is newer than graph\n", line_start);
+
+  const char *retval;
+  vector<const char *> &lines = *source_lines[src.index];
+  if (source_file)
+    while ((retval = read_line (source_file)))
+      lines.push_back (xstrdup (retval));
+
+  if (source_file)
+    fclose (source_file);
+  return lines;
+}
+
 /* Pad string S with spaces from left to have total width equal to 9.  */
 
 static void
@@ -3435,9 +3486,6 @@  output_lines (FILE *gcov_file, const source_info *src)
 #define  DEFAULT_LINE_START "        -:    0:"
 #define FN_SEPARATOR "------------------\n"
 
-  FILE *source_file;
-  const char *retval;
-
   /* Print colorization legend.  */
   if (flag_use_colors)
     fprintf (gcov_file, "%s",
@@ -3464,17 +3512,8 @@  output_lines (FILE *gcov_file, const source_info *src)
       fprintf (gcov_file, DEFAULT_LINE_START "Runs:%u\n", object_runs);
     }
 
-  source_file = fopen (src->name, "r");
-  if (!source_file)
-    fnotice (stderr, "Cannot open source file %s\n", src->name);
-  else if (src->file_time == 0)
-    fprintf (gcov_file, DEFAULT_LINE_START "Source is newer than graph\n");
-
-  vector<const char *> source_lines;
-  if (source_file)
-    while ((retval = read_line (source_file)) != NULL)
-      source_lines.push_back (xstrdup (retval));
-
+  const vector<const char *> &source_lines = slurp (*src, gcov_file,
+						    DEFAULT_LINE_START);
   unsigned line_start_group = 0;
   vector<function_info *> *fns;
   unsigned filtered_line_end = !filters.empty () ? 0 : source_lines.size ();
@@ -3596,7 +3635,4 @@  output_lines (FILE *gcov_file, const source_info *src)
 	  line_start_group = 0;
 	}
     }
-
-  if (source_file)
-    fclose (source_file);
 }