diff mbox

[PR46032] Handle BUILT_IN_GOMP_PARALLEL in ipa-pta

Message ID 565C0F47.5020604@mentor.com
State New
Headers show

Commit Message

Tom de Vries Nov. 30, 2015, 8:56 a.m. UTC
Hi,

this patch fixes PR46032.

It handles a call:
...
   __builtin_GOMP_parallel (fn, data, num_threads, flags)
...
as:
...
   fn (data)
...
in ipa-pta.

This improves ipa-pta alias analysis in the parallelized function fn, 
and allows vectorization in the testcase without a runtime alias test.

Bootstrapped and reg-tested on x86_64.

OK for stage3 trunk?

Thanks,
- Tom

Comments

Richard Biener Nov. 30, 2015, 9:16 a.m. UTC | #1
On Mon, 30 Nov 2015, Tom de Vries wrote:

> Hi,
> 
> this patch fixes PR46032.
> 
> It handles a call:
> ...
>   __builtin_GOMP_parallel (fn, data, num_threads, flags)
> ...
> as:
> ...
>   fn (data)
> ...
> in ipa-pta.
> 
> This improves ipa-pta alias analysis in the parallelized function fn, and
> allows vectorization in the testcase without a runtime alias test.
> 
> Bootstrapped and reg-tested on x86_64.
> 
> OK for stage3 trunk?

+             /* Assign the passed argument to the appropriate incoming
+                parameter of the function.  */
+             struct constraint_expr lhs ;
+             lhs = get_function_part_constraint (fi, fi_parm_base + 0);
+             auto_vec<ce_s, 2> rhsc;
+             struct constraint_expr *rhsp;
+             get_constraint_for_rhs (arg, &rhsc);
+             while (rhsc.length () != 0)
+               {
+                 rhsp = &rhsc.last ();
+                 process_constraint (new_constraint (lhs, *rhsp));
+                 rhsc.pop ();
+               }

please use style used elsewhere with

             FOR_EACH_VEC_ELT (rhsc, j, rhsp)
               process_constraint (new_constraint (lhs, *rhsp));
             rhsc.truncate (0);

+             /* Parameter passed by value is used.  */
+             lhs = get_function_part_constraint (fi, fi_uses);
+             struct constraint_expr *rhsp;
+             get_constraint_for_address_of (arg, &rhsc);

This isn't correct - you want to use get_constraint_for (arg, &rhsc).
After all rhs is already an ADDR_EXPR.

+             FOR_EACH_VEC_ELT (rhsc, j, rhsp)
+               process_constraint (new_constraint (lhs, *rhsp));
+             rhsc.truncate (0);
+
+             /* The caller clobbers what the callee does.  */
+             lhs = get_function_part_constraint (fi, fi_clobbers);
+             rhs = get_function_part_constraint (cfi, fi_clobbers);
+             process_constraint (new_constraint (lhs, rhs));
+
+             /* The caller uses what the callee does.  */
+             lhs = get_function_part_constraint (fi, fi_uses);
+             rhs = get_function_part_constraint (cfi, fi_uses);
+             process_constraint (new_constraint (lhs, rhs));

I don't see why you need those.  The solver should compute these
in even better precision (context sensitive on the call side).

The same is true for the function parameter.  That is, the only
needed part of the patch should be that making sure we see
the "direct" call and assign parameters correctly.

Richard.
diff mbox

Patch

Handle BUILT_IN_GOMP_PARALLEL in pta

2015-11-30  Tom de Vries  <tom@codesourcery.com>

	PR tree-optimization/46032
	* tree-ssa-structalias.c (find_func_aliases_for_builtin_call)
	(find_func_clobbers): Handle BUILT_IN_GOMP_PARALLEL.
	(ipa_pta_execute): Same.  Handle node->parallelized_function as a local
	function.

	* gcc.dg/pr46032.c: New test.

	* testsuite/libgomp.c/pr46032.c: New test.

---
 gcc/testsuite/gcc.dg/pr46032.c        | 47 ++++++++++++++++++++++
 gcc/tree-ssa-structalias.c            | 73 ++++++++++++++++++++++++++++++++++-
 libgomp/testsuite/libgomp.c/pr46032.c | 44 +++++++++++++++++++++
 3 files changed, 162 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr46032.c b/gcc/testsuite/gcc.dg/pr46032.c
new file mode 100644
index 0000000..b91190e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr46032.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp -ftree-vectorize -std=c99 -fipa-pta -fdump-tree-vect-all" } */
+
+extern void abort (void);
+
+#define nEvents 1000
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+init (unsigned *results, unsigned *pData)
+{
+  unsigned int i;
+  for (i = 0; i < nEvents; ++i)
+    pData[i] = i % 3;
+}
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+check (unsigned *results)
+{
+  unsigned sum = 0;
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    sum += results[idx];
+
+  if (sum != 1998)
+    abort ();
+}
+
+int
+main (void)
+{
+  unsigned results[nEvents];
+  unsigned pData[nEvents];
+  unsigned coeff = 2;
+
+  init (&results[0], &pData[0]);
+
+#pragma omp parallel for
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    results[idx] = coeff * pData[idx];
+
+  check (&results[0]);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "versioning for alias required" "vect" } } */
+
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index f24ebeb..3fe538b 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -4488,6 +4488,39 @@  find_func_aliases_for_builtin_call (struct function *fn, gcall *t)
 	    }
 	  return true;
 	}
+      case BUILT_IN_GOMP_PARALLEL:
+	{
+	  /* Handle
+	       __builtin_GOMP_parallel (fn, data, num_threads, flags).  */
+	  if (in_ipa_mode)
+	    {
+	      tree fnarg = gimple_call_arg (t, 0);
+	      gcc_assert (TREE_CODE (fnarg) == ADDR_EXPR);
+	      tree fndecl = TREE_OPERAND (fnarg, 0);
+	      varinfo_t fi = get_vi_for_tree (fndecl);
+	      tree arg = gimple_call_arg (t, 1);
+	      gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
+
+	      /* Assign the passed argument to the appropriate incoming
+		 parameter of the function.  */
+	      struct constraint_expr lhs ;
+	      lhs = get_function_part_constraint (fi, fi_parm_base + 0);
+	      auto_vec<ce_s, 2> rhsc;
+	      struct constraint_expr *rhsp;
+	      get_constraint_for_rhs (arg, &rhsc);
+	      while (rhsc.length () != 0)
+		{
+		  rhsp = &rhsc.last ();
+		  process_constraint (new_constraint (lhs, *rhsp));
+		  rhsc.pop ();
+		}
+
+	      return true;
+	    }
+	  /* Else fallthru to generic handling which will let
+	     the frame escape.  */
+	  break;
+	}
       /* printf-style functions may have hooks to set pointers to
 	 point to somewhere into the generated string.  Leave them
 	 for a later exercise...  */
@@ -5036,6 +5069,37 @@  find_func_clobbers (struct function *fn, gimple *origt)
 	  case BUILT_IN_VA_START:
 	  case BUILT_IN_VA_END:
 	    return;
+	  case BUILT_IN_GOMP_PARALLEL:
+	    {
+	      /* Handle
+		   __builtin_GOMP_parallel (fn, data, num_threads, flags).  */
+	      tree fnarg = gimple_call_arg (t, 0);
+	      gcc_assert (TREE_CODE (fnarg) == ADDR_EXPR);
+	      tree fndecl = TREE_OPERAND (fnarg, 0);
+	      varinfo_t cfi = get_vi_for_tree (fndecl);
+	      tree arg = gimple_call_arg (t, 1);
+	      gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
+
+	      /* Parameter passed by value is used.  */
+	      lhs = get_function_part_constraint (fi, fi_uses);
+	      struct constraint_expr *rhsp;
+	      get_constraint_for_address_of (arg, &rhsc);
+	      FOR_EACH_VEC_ELT (rhsc, j, rhsp)
+		process_constraint (new_constraint (lhs, *rhsp));
+	      rhsc.truncate (0);
+
+	      /* The caller clobbers what the callee does.  */
+	      lhs = get_function_part_constraint (fi, fi_clobbers);
+	      rhs = get_function_part_constraint (cfi, fi_clobbers);
+	      process_constraint (new_constraint (lhs, rhs));
+
+	      /* The caller uses what the callee does.  */
+	      lhs = get_function_part_constraint (fi, fi_uses);
+	      rhs = get_function_part_constraint (cfi, fi_uses);
+	      process_constraint (new_constraint (lhs, rhs));
+
+	      return;
+	    }
 	  /* printf-style functions may have hooks to set pointers to
 	     point to somewhere into the generated string.  Leave them
 	     for a later exercise...  */
@@ -7352,7 +7416,8 @@  ipa_pta_execute (void)
       bool nonlocal_p = (node->used_from_other_partition
 			 || node->externally_visible
 			 || node->force_output
-			 || node->address_taken);
+			 || (node->address_taken
+			     && !node->parallelized_function));
 
       vi = create_function_info_for (node->decl,
 				     alias_get_name (node->decl), false,
@@ -7504,7 +7569,11 @@  ipa_pta_execute (void)
 		continue;
 
 	      /* Handle direct calls to functions with body.  */
-	      decl = gimple_call_fndecl (stmt);
+	      if (gimple_call_builtin_p (stmt, BUILT_IN_GOMP_PARALLEL))
+		decl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
+	      else
+		decl = gimple_call_fndecl (stmt);
+
 	      if (decl
 		  && (fi = lookup_vi_for_tree (decl))
 		  && fi->is_fn_info)
diff --git a/libgomp/testsuite/libgomp.c/pr46032.c b/libgomp/testsuite/libgomp.c/pr46032.c
new file mode 100644
index 0000000..2178aa7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr46032.c
@@ -0,0 +1,44 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -std=c99 -fipa-pta" } */
+
+
+extern void abort (void);
+
+#define nEvents 1000
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+init (unsigned *results, unsigned *pData)
+{
+  unsigned int i;
+  for (i = 0; i < nEvents; ++i)
+    pData[i] = i % 3;
+}
+
+static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize")))
+check (unsigned *results)
+{
+  unsigned sum = 0;
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    sum += results[idx];
+
+  if (sum != 1998)
+    abort ();
+}
+
+int
+main (void)
+{
+  unsigned results[nEvents];
+  unsigned pData[nEvents];
+  unsigned coeff = 2;
+
+  init (&results[0], &pData[0]);
+
+#pragma omp parallel for
+  for (int idx = 0; idx < (int)nEvents; idx++)
+    results[idx] = coeff * pData[idx];
+
+  check (&results[0]);
+
+  return 0;
+}