diff mbox

[6/7] Real aggregate contents merge and application of deltas

Message ID 20140521131634.584457278@virgil.suse.cz
State New
Headers show

Commit Message

Martin Jambor May 21, 2014, 1:16 p.m. UTC
Hi,

the previous patch used a very simplistic merging and delta
application of aggregate contents.  This patch replaces it with a real
one.

Because there are potentially many basic blocks and the contents of a
particular aggregate are very likely to be the same for many of them,
the description of the contents are shared among BBs.  In order to
facilitate this, each block knows whether it owns a particular
description and thus whether it can change it in place or has to copy
the list (list lengths are capped at PARAM_IPA_MAX_AGG_ITEMS).

I have gathered some information on the count of aggregate jump
functions (again I should have counted only pointer and aggregate
parameters but I realized it too late, other scalars arguments cannot
have jump function aggregate items so it needlessly makes the
per-jump-function columns even more puny).  Everything is with -Ofast
-flto:

 |                    |           | Unpatched |      |           |      |          |
 | Testcase           |      Jump | Aggregate |  per |   Patched |  per | increase |
 |                    | functions |     items |   jf | agg items |   jf |        % |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | FF libxul.so       |   1756545 |     82066 | 0.05 |    102779 | 0.06 |    25.24 |
 | Tramp 3D           |     19421 |       330 | 0.02 |       577 | 0.03 |    74.85 |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | perlbench          |     28169 |        76 | 0.00 |        85 | 0.00 |    11.84 |
 | bzip2              |       858 |         0 | 0.00 |         0 | 0.00 |     0.00 |
 | gcc                |    105977 |       156 | 0.00 |       166 | 0.00 |     6.41 |
 | mcf                |       161 |         1 | 0.01 |         2 | 0.01 |   100.00 |
 | gobmk              |     28843 |        41 | 0.00 |        42 | 0.00 |     2.44 |
 | hmmer              |      5122 |        11 | 0.00 |        14 | 0.00 |    27.27 |
 | sjeng              |      2089 |         6 | 0.00 |         6 | 0.00 |     0.00 |
 | libquantum         |       820 |         0 | 0.00 |         0 | 0.00 |     0.00 |
 | h264ref            |      8316 |        30 | 0.00 |        31 | 0.00 |     3.33 |
 | omnetpp            |       738 |        19 | 0.03 |        20 | 0.03 |     5.26 |
 | xalancbmk          |    121014 |      1220 | 0.01 |      1535 | 0.01 |    25.82 |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | bwaves             |       384 |        92 | 0.24 |        97 | 0.25 |     5.43 |
 | gamess             |    194795 |     44085 | 0.23 |     45319 | 0.23 |     2.80 |
 | milc               |      3027 |         0 | 0.00 |         1 | 0.00 |   100.00 |
 | zeusmp             |      4849 |      1011 | 0.21 |      1063 | 0.22 |     5.14 |
 | gromacs            |     27421 |        57 | 0.00 |        79 | 0.00 |    38.60 |
 | cactusADM          |     15693 |       230 | 0.01 |       237 | 0.02 |     3.04 |
 | leslie3d           |      1694 |       466 | 0.28 |       466 | 0.28 |     0.00 |
 | namd               |      3050 |        12 | 0.00 |       102 | 0.03 |   750.00 |
 | soplex             |      7904 |        59 | 0.01 |       109 | 0.01 |    84.75 |
 | povray             |     23981 |       317 | 0.01 |       351 | 0.01 |    10.73 |
 | calculix           |     42451 |      6175 | 0.15 |      7228 | 0.17 |    17.05 |
 | GemsFDTD           |      5782 |      1289 | 0.22 |      1519 | 0.26 |    17.84 |
 | tonto              |     81853 |      7900 | 0.10 |      8521 | 0.10 |     7.86 |
 | lbm                |       171 |         0 | 0.00 |         0 | 0.00 |     0.00 |
 | wrf                |    121094 |      4330 | 0.04 |      4423 | 0.04 |     2.15 |
 | sphinx3            |      5880 |         9 | 0.00 |        10 | 0.00 |    11.11 |
 |--------------------+-----------+-----------+------+-----------+------+----------+
 | ac.f90             |       474 |       287 | 0.61 |       300 | 0.63 |     4.53 |
 | aermod.f90         |     33296 |      6923 | 0.21 |      6866 | 0.21 |    -0.82 |
 | air.f90            |      1010 |       366 | 0.36 |       376 | 0.37 |     2.73 |
 | capacita.f90       |       487 |        80 | 0.16 |       153 | 0.31 |    91.25 |
 | channel2.f90       |       379 |       242 | 0.64 |       242 | 0.64 |     0.00 |
 | doduc.f90          |       938 |       240 | 0.26 |       258 | 0.28 |     7.50 |
 | fatigue2.f90       |      1936 |      1251 | 0.65 |      1259 | 0.65 |     0.64 |
 | gas_dyn2.f90       |      1033 |       500 | 0.48 |       500 | 0.48 |     0.00 |
 | induct2.f90        |      3982 |      2179 | 0.55 |      2183 | 0.55 |     0.18 |
 | linpk.f90          |        85 |        12 | 0.14 |        12 | 0.14 |     0.00 |
 | mdbx.f90           |       491 |       181 | 0.37 |       204 | 0.42 |    12.71 |
 | mp_prop_design.f90 |       518 |       334 | 0.64 |       334 | 0.64 |     0.00 |
 | nf.f90             |       398 |        48 | 0.12 |        48 | 0.12 |     0.00 |
 | protein.f90        |      1322 |       909 | 0.69 |       917 | 0.69 |     0.88 |
 | rnflow.f90         |      1282 |       132 | 0.10 |       145 | 0.11 |     9.85 |
 | test_fpu2.f90      |       991 |        70 | 0.07 |        70 | 0.07 |     0.00 |
 | tfft2.f90          |        65 |        30 | 0.46 |        30 | 0.46 |     0.00 |

As you can see, jump functions are very much a Fortran thing which is
quite expected.  The increases are not quite smaller than what I had
hoped for.  The reason is that the array descriptors which are not
already handled by our current technique are not filled with constants
but with data copied from other descriptors, sometimes after checks
for non-zero values.  Therefore, in addition to these patches, they
will require more complex aggregate jump functions allowing
pass-throughs and simple arithmetic ones in addition to simple values.

Bootstrapped and tested on x86_64 where it also passes LTO bootstrap
and is able to LTO build Firefox.

Thanks,

Martin


2014-02-25  Martin Jambor  <mjambor@suse.cz>

	* ipa-prop.c (ipa_bb_info): New field own_begin_agg_cnt.
	(apply_agg_contents_deltas): New parameters fbi and own.
	Reimplemented properly.  Adjust callers.
	(ipa_analyze_bb_statements): Allocate bi->own_begin_agg_cnt.
	(free_ipa_bb_info): Free own_begin_agg_cnt.
	(prune_agg_contents): New function.
	(merge_agg_contents): New parameters fbi, inc_own and target_own,
	adjusted all callers.  Reimplemented.
	(propagate_agg_cnts_through_bb): Manage own flags.

testsuite/
	* gcc.dg/ipa/ipcp-agg-15.c: New test.
diff mbox

Patch

Index: src/gcc/ipa-prop.c
===================================================================
--- src.orig/gcc/ipa-prop.c
+++ src/gcc/ipa-prop.c
@@ -172,6 +172,10 @@  struct ipa_bb_info
   /* Aggregate contents of tracked references at the beginning of each BB.  */
   vec<ipa_known_agg_contents_list *> begin_agg_cnt;
 
+  /* True if this BB is the designated owner of the corresponding pointer in
+     begin_agg_cnt. */
+  vec<bool> own_begin_agg_cnt;
+
   /* Changes in aggregate contents of tracked references.  */
   vec<ipa_known_agg_contents_list *> agg_deltas;
 
@@ -1993,26 +1997,77 @@  determine_locally_known_aggregate_parts
     }
 }
 
-/* Apply basic block DELTAS to INITial aggregate contents description.  */
+/* Apply basic block DELTAS to INITial aggregate contents description.  FBI
+   describes the current function.  Set *OWN to true if the result is
+   exclusively held and can be modified in place.  */
 
 static struct ipa_known_agg_contents_list *
-apply_agg_contents_deltas (struct ipa_known_agg_contents_list *init,
-			   struct ipa_known_agg_contents_list *deltas)
+apply_agg_contents_deltas (struct func_body_info *fbi,
+			   struct ipa_known_agg_contents_list *init,
+			   struct ipa_known_agg_contents_list *deltas,
+			   bool *own)
 {
-  /* TODO: This over-conservative but should work for Fortran descriptors.
-     Will be replaced in a subsequent patches with real merging.  */
-
-  gcc_assert (init != AGG_CONTENTS_TOP);
-  if (deltas)
-    return deltas;
-  else
-    {
+  gcc_checking_assert (init != AGG_CONTENTS_TOP);
 #ifdef ENABLE_CHECKING
-      for (struct ipa_known_agg_contents_list *p = init; p; p = p->next)
-	gcc_assert (p->only_unescaped);
+  for (struct ipa_known_agg_contents_list *p = init; p; p = p->next)
+    gcc_assert (p->only_unescaped);
 #endif
+
+  if (!init)
+    {
+      *own = false;
+      return deltas;
+    }
+  if (!deltas)
+    {
+      *own = false;
       return init;
     }
+
+  *own = true;
+  struct ipa_known_agg_contents_list *list = NULL, **r = &list;
+  while (init || deltas)
+    {
+      struct ipa_known_agg_contents_list *p = NULL;
+      if (deltas && (!init || deltas->offset < init->offset))
+	{
+	  if (deltas->constant)
+	    p = deltas;
+	  while (init && init->offset < deltas->offset + deltas->size)
+	    init = init->next;
+	  deltas = deltas->next;
+	}
+      else if (init && (!deltas || init->offset < deltas->offset))
+	{
+	  if (init->constant
+	      && (!deltas
+		  || init->offset + init->size >= deltas->offset))
+	    p = init;
+	  init = init->next;
+	}
+      else
+	{
+	  gcc_checking_assert (init->offset == deltas->offset);
+	  if (deltas->constant)
+	    p = deltas;
+	  while (init && init->offset < deltas->offset + deltas->size)
+	    init = init->next;
+	  deltas = deltas->next;
+	}
+
+      if (p)
+	{
+	  *r = (struct ipa_known_agg_contents_list *)
+	    pool_alloc (fbi->agg_contents_pool);
+	  (*r)->offset = p->offset;
+	  (*r)->size = p->size;
+	  (*r)->constant = p->constant;
+	  (*r)->only_unescaped = p->only_unescaped;
+	  (*r)->next = NULL;
+	  r = &(*r)->next;
+	}
+    }
+  return list;
 }
 
 static tree
@@ -2264,9 +2319,10 @@  ipa_compute_jump_functions_for_edge (str
 		  struct ipa_bb_info *bi;
 		  bi = ipa_get_bb_info (fbi, gimple_bb (cs->call_stmt));
 		  struct ipa_known_agg_contents_list *begin, *final, *p;
+		  bool dummy;
 		  begin = bi->begin_agg_cnt[ref_index];
-		  final = apply_agg_contents_deltas (begin, (*dvec)[n]);
-
+		  final = apply_agg_contents_deltas (fbi, begin, (*dvec)[n],
+						     &dummy);
 		  int const_count = 0;
 		  for (p = final; p; p = p->next)
 		    if (p->constant)
@@ -2892,6 +2948,8 @@  ipa_analyze_bb_statements (struct func_b
 	      sizeof (int) * ipa_get_tracked_refs_count (fbi->info));
 
       bi->begin_agg_cnt.safe_grow (ipa_get_tracked_refs_count (fbi->info));
+      bi->own_begin_agg_cnt.safe_grow_cleared
+	(ipa_get_tracked_refs_count (fbi->info));
       for (int i = 0; i < ipa_get_tracked_refs_count (fbi->info); ++i)
 	bi->begin_agg_cnt[i] = AGG_CONTENTS_TOP;
     }
@@ -2982,6 +3040,7 @@  free_ipa_bb_info (struct ipa_bb_info *bi
   bi->cg_edges.release ();
   bi->param_aa_statuses.release ();
   bi->begin_agg_cnt.release ();
+  bi->own_begin_agg_cnt.release ();
   bi->agg_deltas.release ();
 }
 
@@ -3313,31 +3372,123 @@  gather_picked_escapes (struct func_body_
     }
 }
 
-/* Merge aggregate contents FINAL with those in *TARGET.  Return true if those
-   in *TARGET have changed.  */
+/* Remova all items from the list in *TARGET that are not also in INCOMING.
+   Return true if any was actually removed.  */
 
 static bool
-merge_agg_contents (struct ipa_known_agg_contents_list *final,
+prune_agg_contents (struct ipa_known_agg_contents_list *incoming,
 		    struct ipa_known_agg_contents_list **target)
 {
-  /* TODO: This over-conservative but should work for Fortran descriptors.
-     Will be replaced in a subsequent patches by real merging.  */
+  bool res = false;
+  while (*target && incoming)
+    {
+      if ((*target)->offset > incoming->offset)
+	{
+	  while (*target
+		 && (*target)->offset > incoming->offset
+		 && (*target)->offset < incoming->offset + incoming->size)
+	    {
+	      *target = (*target)->next;
+	      res = true;
+	    }
+
+	  incoming = incoming->next;
+	}
+      else if ((*target)->offset < incoming->offset)
+	{
+	  while (incoming
+		 && (*target)->offset < incoming->offset
+		 && (*target)->offset + (*target)->size > incoming->size)
+	    incoming = incoming->next;
+
+	  *target = (*target)->next;
+	  res = true;
+	}
+      else if ((*target)->size != incoming->size
+	       || !(*target)->constant
+	       || !incoming->constant
+	       || !operand_equal_p ((*target)->constant, incoming->constant, 0))
+	{
+	  *target = (*target)->next;
+	  incoming = incoming->next;
+	  res = true;
+	}
+      else
+	{
+	  gcc_checking_assert ((*target)->only_unescaped);
+	  target = &(*target)->next;
+	  incoming = incoming->next;
+	}
+    }
+
+  res |= *target != NULL;
+  *target = NULL;
+  return res;
+}
+
+/* Merge aggregate contents INCOMING with those in *TARGET.  Return true if
+   those in *TARGET have changed.  FBI describes the current function.  INC_OWN
+   determines whether the INCOMING list is exclusively owned and can be
+   modified in place or not.  TARGET_OWN points to a flag that describes
+   *TARGET in the same way and which may be modified.  */
+
+static bool
+merge_agg_contents (struct func_body_info *fbi,
+		    struct ipa_known_agg_contents_list *incoming, bool inc_own,
+		    struct ipa_known_agg_contents_list **target,
+		    bool *target_own)
+{
   if (*target == AGG_CONTENTS_TOP)
     {
-      *target = final;
+      *target = incoming;
+      *target_own = inc_own;
       return true;
     }
-  else if (*target != final)
+  if (*target == NULL
+      || *target == incoming)
+    return false;
+
+  if (*target_own)
+    return prune_agg_contents (incoming, target);
+
+  /* TODO: This may save some memory but is it worthwile? */
+  for (struct ipa_known_agg_contents_list *p = (*target)->next; p; p = p->next)
+    if (p == incoming)
+      {
+	*target = p;
+	return true;
+      }
+
+  struct ipa_known_agg_contents_list *p = *target, *list = NULL, **r = &list;
+  while (p && incoming)
     {
-      if (*target)
+      if (p->offset > incoming->offset)
+	incoming = incoming->next;
+      else if (p->offset < incoming->offset)
+	p = p->next;
+      else
 	{
-	  *target = NULL;
-	  return true;
+	  if (p->size == incoming->size
+	      && p->constant
+	      && incoming->constant
+	      && operand_equal_p (p->constant, incoming->constant, 0))
+	    {
+	      *r = (struct ipa_known_agg_contents_list *)
+		pool_alloc (fbi->agg_contents_pool);
+	      (*r)->offset = p->offset;
+	      (*r)->size = p->size;
+	      (*r)->constant = p->constant;
+	      (*r)->only_unescaped = true;
+	      (*r)->next = NULL;
+	      r = &(*r)->next;
+	    }
+	  p = p->next;
+	  incoming = incoming->next;
 	}
-      else
-	return false;
     }
-  return false;
+  *target_own = true;
+  *target = list;
+  return true;
 }
 
 /* Apply all computed aggregate deltas for the given BB and merge results into
@@ -3356,7 +3507,10 @@  propagate_agg_cnts_through_bb (struct fu
     else
       deltas = bi->agg_deltas[i];
 
-    final = apply_agg_contents_deltas (bi->begin_agg_cnt[i], deltas);
+    bool fin_own;
+    final = apply_agg_contents_deltas (fbi, bi->begin_agg_cnt[i], deltas,
+				       &fin_own);
+    fin_own = fin_own && single_succ_p (bb);
 
     edge e;
     edge_iterator ei;
@@ -3371,7 +3525,9 @@  propagate_agg_cnts_through_bb (struct fu
 
 	gcc_checking_assert (succ_info->begin_agg_cnt.length ()
 			     >= (unsigned) i);
-	if (merge_agg_contents (final, &succ_info->begin_agg_cnt[i])
+	if (merge_agg_contents (fbi, final, fin_own,
+				&succ_info->begin_agg_cnt[i],
+				&succ_info->own_begin_agg_cnt[i])
 	    && !succ_info->queued)
 	  {
 	    succ_info->queued = true;
Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-15.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-15.c
@@ -0,0 +1,45 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details"  } */
+/* { dg-add-options bind_pic_locally } */
+
+struct S
+{
+  int i,j;
+};
+
+volatile int g;
+int *p;
+
+static int __attribute__ ((noinline, noclone))
+something_unpredictable (void)
+{
+  *p = 6;
+  return 1;
+}
+
+
+static void __attribute__ ((noinline))
+bar (struct S *ps)
+{
+  something_unpredictable ();
+  g = ps->j;
+}
+
+int
+main (int argc, char **argv)
+{
+  struct S s;
+
+  s.i = 2;
+  s.j = 8;
+
+  if (something_unpredictable ())
+    s.i = 6;
+
+  bar (&s);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of bar.*for all known contexts" "cp" } } */
+/* { dg-final { cleanup-ipa-dump "cp" } } */