diff mbox series

[v2] Add -ftime-report-wall

Message ID 20241005081649.2080287-1-ak@linux.intel.com
State New
Headers show
Series [v2] Add -ftime-report-wall | expand

Commit Message

Andi Kleen Oct. 5, 2024, 8:16 a.m. UTC
From: Andi Kleen <ak@gcc.gnu.org>

Time vars normally use times(2) to get the user/sys/wall time, which is always a
system call. I don't think the system time is very useful because most overhead
is in user time. If we only use the wall (or monotonic) time modern OS have an
optimized path to get it directly from a CPU instruction like RDTSC
without system call, which is much faster.

Add a -ftime-report-wall option. It actually uses the POSIX monotonic time,
so strictly it's not wall clock, but it's still a reasonable name.

Comparing the overhead with tramp3d -O0:

  ./gcc/cc1plus -quiet  ../tsrc/tramp3d-v4.i ran
    1.03 ± 0.00 times faster than ./gcc/cc1plus -quiet -ftime-report-wall ../tsrc/tramp3d-v4.i
    1.18 ± 0.00 times faster than ./gcc/cc1plus -quiet -ftime-report ../tsrc/tramp3d-v4.i

-ftime-report costs 18% (excluding the output), while -ftime-report-wall
only costs 3%, so is nearly free. So it would be feasible for some build
system to always enable it and break down the build time into passes.

With -O2 it is a bit less pronounced but still visible:

  ./gcc/cc1plus -O2 -quiet  ../tsrc/tramp3d-v4.i ran
    1.00 ± 0.00 times faster than ./gcc/cc1plus -O2 -quiet -ftime-report-wall ../tsrc/tramp3d-v4.i
    1.08 ± 0.01 times faster than ./gcc/cc1plus -O2 -quiet -ftime-report ../tsrc/tramp3d-v4.i

The drawback is that if there is context switching with other programs
the time will be overestimated, however for the common case that the
system is not oversubscribed it is more accurate because each
measurement has less overhead.

Bootstrapped on x86_64-linux with full test suite run.

gcc/ChangeLog:

	* common.opt (ftime-report-wall): Add.
	* common.opt.urls: Regenerate.
	* doc/invoke.texi: (ftime-report-wall): Document
	* gcc.cc (try_generate_repro): Check for -ftime-report-wall.
	* timevar.cc (get_time): Use clock_gettime if enabled.
	(timer::print): Print only wall time for time_report_wall.
	(make_json_for_timevar_time_def): Dito.
	* toplev.cc (toplev::start_timevars): Check for time_report_wall.

gcc/testsuite/ChangeLog:

	* g++.dg/ext/timevar3.C: New test.

---

v2: Adjust JSON/Sarif output too.
---
 gcc/common.opt                      |  4 +++
 gcc/common.opt.urls                 |  3 +++
 gcc/doc/invoke.texi                 |  7 ++++++
 gcc/gcc.cc                          |  3 ++-
 gcc/testsuite/g++.dg/ext/timevar3.C | 14 +++++++++++
 gcc/timevar.cc                      | 38 +++++++++++++++++++++++------
 gcc/toplev.cc                       |  3 ++-
 7 files changed, 62 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/timevar3.C
diff mbox series

Patch

diff --git a/gcc/common.opt b/gcc/common.opt
index 12b25ff486de..a200a8a0bc45 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3014,6 +3014,10 @@  ftime-report
 Common Var(time_report)
 Report the time taken by each compiler pass.
 
+ftime-report-wall
+Common Var(time_report_wall)
+Report the wall time taken by each compiler.
+
 ftime-report-details
 Common Var(time_report_details)
 Record times taken by sub-phases separately.
diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls
index e31736cd9945..6e79a8f9390b 100644
--- a/gcc/common.opt.urls
+++ b/gcc/common.opt.urls
@@ -1378,6 +1378,9 @@  UrlSuffix(gcc/Optimize-Options.html#index-fthread-jumps)
 ftime-report
 UrlSuffix(gcc/Developer-Options.html#index-ftime-report)
 
+ftime-report-wall
+UrlSuffix(gcc/Developer-Options.html#index-ftime-report-wall)
+
 ftime-report-details
 UrlSuffix(gcc/Developer-Options.html#index-ftime-report-details)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d38c1feb86f7..8c11d12e7521 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -784,6 +784,7 @@  Objective-C and Objective-C++ Dialects}.
 -frandom-seed=@var{string}  -fsched-verbose=@var{n}
 -fsel-sched-verbose  -fsel-sched-dump-cfg  -fsel-sched-pipelining-verbose
 -fstats  -fstack-usage  -ftime-report  -ftime-report-details
+-ftime-report-wall
 -fvar-tracking-assignments-toggle  -gtoggle
 -print-file-name=@var{library}  -print-libgcc-file-name
 -print-multi-directory  -print-multi-lib  -print-multi-os-directory
@@ -21048,6 +21049,12 @@  slightly different place within the compiler.
 @item -ftime-report-details
 Record the time consumed by infrastructure parts separately for each pass.
 
+@opindex ftime-report-wall
+@item -ftime-report-wall
+Report statistics about compiler pass time consumpion, but only using wall
+time.  This is faster than @option{-ftime-report}, but can be more
+influenced by background jobs.
+
 @opindex fira-verbose
 @item -fira-verbose=@var{n}
 Control the verbosity of the dump file for the integrated register allocator.
diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index 16fed46fb35f..8d3046eb7874 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -7964,7 +7964,8 @@  try_generate_repro (const char **argv)
        it might varry between invocations.  */
     else if (! strcmp (argv[nargs], "-quiet"))
       quiet = 1;
-    else if (! strcmp (argv[nargs], "-ftime-report"))
+    else if (! strcmp (argv[nargs], "-ftime-report")
+		   || ! strcmp (argv[nargs], "-ftime-report-wall"))
       return;
 
   if (out_arg == -1 || !quiet)
diff --git a/gcc/testsuite/g++.dg/ext/timevar3.C b/gcc/testsuite/g++.dg/ext/timevar3.C
new file mode 100644
index 000000000000..b003f37f9654
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/timevar3.C
@@ -0,0 +1,14 @@ 
+// PR c++/52248
+// { dg-options "-ftime-report-wall" }
+// { dg-allow-blank-lines-in-output 1 }
+// { dg-prune-output "Time variable" }
+// { dg-prune-output "k" }
+// { dg-prune-output " 0 " }
+// { dg-prune-output "checks" }
+// { dg-prune-output "\[0-9\]+%" }
+
+void
+foo ()
+{
+  goto lab;			// { dg-error "not defined" }
+}
diff --git a/gcc/timevar.cc b/gcc/timevar.cc
index 68bcf44864f9..4930b1175a13 100644
--- a/gcc/timevar.cc
+++ b/gcc/timevar.cc
@@ -243,6 +243,15 @@  get_time (struct timevar_time_def *now)
   now->ggc_mem = timevar_ggc_mem_total;
 
   {
+#if _POSIX_C_SOURCE >= 199309L
+    if (time_report_wall)
+      {
+	struct timespec ts;
+	clock_gettime (CLOCK_MONOTONIC, &ts);
+	now->wall = ts.tv_sec * 1000000000 + ts.tv_nsec;
+	return;
+      }
+#endif
 #ifdef USE_TIMES
     struct tms tms;
     now->wall = times (&tms)  * ticks_to_nanosec;
@@ -682,14 +691,16 @@  timer::print_row (FILE *fp,
 
 #ifdef HAVE_USER_TIME
   /* Print user-mode time for this process.  */
-  fprintf (fp, "%7.2f (%3.0f%%)",
+  if (!time_report_wall)
+    fprintf (fp, "%7.2f (%3.0f%%)",
 	   nanosec_to_floating_sec (elapsed.user),
 	   percent_of (total->user, elapsed.user));
 #endif /* HAVE_USER_TIME */
 
 #ifdef HAVE_SYS_TIME
+  if (!time_report_wall)
   /* Print system-mode time for this process.  */
-  fprintf (fp, "%7.2f (%3.0f%%)",
+    fprintf (fp, "%7.2f (%3.0f%%)",
 	   nanosec_to_floating_sec (elapsed.sys),
 	   percent_of (total->sys, elapsed.sys));
 #endif /* HAVE_SYS_TIME */
@@ -754,7 +765,10 @@  timer::print (FILE *fp)
      TIMEVAR.  */
   m_start_time = now;
 
-  fprintf (fp, "\n%-35s%16s%14s%14s%14s\n", "Time variable", "usr", "sys",
+  if (time_report_wall)
+    fprintf (fp, "\n%-35s%16s%14s\n", "Time variable", "wall", "GGC");
+  else
+    fprintf (fp, "\n%-35s%16s%14s%14s%14s\n", "Time variable", "usr", "sys",
 	   "wall", "GGC");
   if (m_jit_client_items)
     fputs ("GCC items:\n", fp);
@@ -810,13 +824,18 @@  timer::print (FILE *fp)
   /* Print total time.  */
   fprintf (fp, " %-35s:", "TOTAL");
 #ifdef HAVE_USER_TIME
-  fprintf (fp, "%7.2f      ", nanosec_to_floating_sec (total->user));
+  if (!time_report_wall)
+    fprintf (fp, "%7.2f      ", nanosec_to_floating_sec (total->user));
 #endif
 #ifdef HAVE_SYS_TIME
-  fprintf (fp, "%8.2f      ", nanosec_to_floating_sec (total->sys));
+  if (!time_report_wall)
+    fprintf (fp, "%8.2f      ", nanosec_to_floating_sec (total->sys));
 #endif
 #ifdef HAVE_WALL_TIME
-  fprintf (fp, "%8.2f      ", nanosec_to_floating_sec (total->wall));
+  if (!time_report_wall)
+    fprintf (fp, "%8.2f      ", nanosec_to_floating_sec (total->wall));
+  else
+    fprintf (fp, "%7.2f      ", nanosec_to_floating_sec (total->wall));
 #endif
   fprintf (fp, PRsa (7) "\n", SIZE_AMOUNT (total->ggc_mem));
 
@@ -842,8 +861,11 @@  json::object *
 make_json_for_timevar_time_def (const timevar_time_def &ttd)
 {
   json::object *obj = new json::object ();
-  obj->set_float ("user", nanosec_to_floating_sec (ttd.user));
-  obj->set_float ("sys", nanosec_to_floating_sec (ttd.sys));
+  if (!time_report_wall)
+    {
+      obj->set_float ("user", nanosec_to_floating_sec (ttd.user));
+      obj->set_float ("sys", nanosec_to_floating_sec (ttd.sys));
+    }
   obj->set_float ("wall", nanosec_to_floating_sec (ttd.wall));
   obj->set_integer ("ggc_mem", ttd.ggc_mem);
   return obj;
diff --git a/gcc/toplev.cc b/gcc/toplev.cc
index 5df59b79c803..87fa3f4c8833 100644
--- a/gcc/toplev.cc
+++ b/gcc/toplev.cc
@@ -2247,7 +2247,8 @@  toplev::~toplev ()
 void
 toplev::start_timevars ()
 {
-  if (time_report || !quiet_flag  || flag_detailed_statistics)
+  if (time_report || time_report_wall || !quiet_flag
+		  || flag_detailed_statistics)
     timevar_init ();
 
   timevar_start (TV_TOTAL);