diff mbox

[gomp4,WIP] Implement -foffload-alias

Message ID 56095EEF.7010700@mentor.com
State New
Headers show

Commit Message

Tom de Vries Sept. 28, 2015, 3:38 p.m. UTC
Hi,

this work-in-progress patch implements a new option 
-foffload-alias=<none|pointer|all>.


The option -foffload-alias=none instructs the compiler to assume that
objects references and pointer dereferences in an offload region do not 
alias.

The option -foffload-alias=pointer instructs the compiler to assume that 
objects references in an offload region do not alias.

The option -foffload-alias=all instructs the compiler to make no 
assumptions about aliasing in offload regions.

The default value is -foffload-alias=none.


The patch works by adding restrict to the types of the fields used to 
pass data to an offloading region.

Atm, the kernels-loop-offload-alias-ptr.c test-case passes, but the 
kernels-loop-offload-alias-none.c test-case fails.  For the latter, the 
required amount of restrict is added, but it has no effect. I've 
reported this in a more basic form in PR67742: "3rd-level restrict ignored".

Thanks,
- Tom
diff mbox

Patch

Implement -foffload-alias

2015-09-28  Tom de Vries  <tom@codesourcery.com>

	* common.opt (foffload-alias): New option.
	* flag-types.h (enum offload_alias): New enum.
	* omp-low.c (is_gimple_oacc_offload): New function.
	(install_var_field): Handle flag_offload_alias.
	* doc/invoke.texi (@item Code Generation Options): Add -foffload-alias.
	(@item -foffload-alias): New item.

	* c-c++-common/goacc/kernels-loop-offload-alias-none.c: New test.
	* c-c++-common/goacc/kernels-loop-offload-alias-ptr.c: New test.
---
 gcc/common.opt                                     | 16 ++++++
 gcc/doc/invoke.texi                                | 11 ++++
 gcc/flag-types.h                                   |  7 +++
 gcc/omp-low.c                                      | 24 ++++++++-
 .../goacc/kernels-loop-offload-alias-none.c        | 63 ++++++++++++++++++++++
 .../goacc/kernels-loop-offload-alias-ptr.c         | 52 ++++++++++++++++++
 6 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 290b6b3..28977a4 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1730,6 +1730,22 @@  Enum(offload_abi) String(ilp32) Value(OFFLOAD_ABI_ILP32)
 EnumValue
 Enum(offload_abi) String(lp64) Value(OFFLOAD_ABI_LP64)
 
+foffload-alias=
+Common Joined RejectNegative Enum(offload_alias) Var(flag_offload_alias) Init(OFFLOAD_ALIAS_NONE)
+-foffload-alias=[all|pointer|none]     Assume non-aliasing in an offload region
+
+Enum
+Name(offload_alias) Type(enum offload_alias) UnknownError(unknown offload aliasing %qs)
+
+EnumValue
+Enum(offload_alias) String(all) Value(OFFLOAD_ALIAS_ALL)
+
+EnumValue
+Enum(offload_alias) String(pointer) Value(OFFLOAD_ALIAS_POINTER)
+
+EnumValue
+Enum(offload_alias) String(none) Value(OFFLOAD_ALIAS_NONE)
+
 fomit-frame-pointer
 Common Report Var(flag_omit_frame_pointer) Optimization
 When possible do not generate stack frames
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 909a453..a5ab785 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1136,6 +1136,7 @@  See S/390 and zSeries Options.
 -finstrument-functions-exclude-function-list=@var{sym},@var{sym},@dots{} @gol
 -finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{} @gol
 -fno-common  -fno-ident @gol
+-foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]} @gol
 -fpcc-struct-return  -fpic  -fPIC -fpie -fPIE -fno-plt @gol
 -fno-jump-tables @gol
 -frecord-gcc-switches @gol
@@ -23695,6 +23696,16 @@  The options @option{-ftrapv} and @option{-fwrapv} override each other, so using
 using @option{-ftrapv} @option{-fwrapv} @option{-fno-wrapv} on the command-line
 results in @option{-ftrapv} being effective.
 
+@item -foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]}
+@opindex -foffload-alias
+The option @option{-foffload-alias=none} instructs the compiler to assume that
+objects references and pointer dereferences in an offload region do not alias.
+The option @option{-foffload-alias=pointer} instruct the compiler to assume that
+objects references in an offload region are presumed unaliased.  The option
+@option{-foffload-alias=all} instructs the compiler to make no assumtions about
+aliasing in offload regions.  The default value is
+@option{-foffload-alias=none}.
+
 @item -fexceptions
 @opindex fexceptions
 Enable exception handling.  Generates extra code needed to propagate
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index ac9ca0b..e8e672d 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -286,5 +286,12 @@  enum gfc_convert
   GFC_FLAG_CONVERT_LITTLE
 };
 
+enum offload_alias
+{
+  OFFLOAD_ALIAS_ALL,
+  OFFLOAD_ALIAS_POINTER,
+  OFFLOAD_ALIAS_NONE
+};
+
 
 #endif /* ! GCC_FLAG_TYPES_H */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a5904eb..9cbba1f 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1140,6 +1140,16 @@  use_pointer_for_field (tree decl, omp_context *shared_ctx)
   return false;
 }
 
+static bool
+is_gimple_oacc_offload (const gimple *stmt)
+{
+  return (gimple_code (stmt) == GIMPLE_OMP_TARGET
+	  && (((gimple_omp_target_kind (stmt)
+		== GF_OMP_TARGET_KIND_OACC_PARALLEL)
+	       || (gimple_omp_target_kind (stmt)
+		   == GF_OMP_TARGET_KIND_OACC_KERNELS))));
+}
+
 /* Construct a new automatic decl similar to VAR.  */
 
 static tree
@@ -1283,6 +1293,7 @@  static void
 install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
 {
   tree field, type, sfield = NULL_TREE;
+  bool in_oacc_offload = is_gimple_oacc_offload (ctx->stmt);
 
   gcc_assert ((mask & 1) == 0
 	      || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
@@ -1298,7 +1309,18 @@  install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
       type = build_pointer_type (build_pointer_type (type));
     }
   else if (by_ref)
-    type = build_pointer_type (type);
+    {
+      if (in_oacc_offload
+	  && flag_offload_alias == OFFLOAD_ALIAS_NONE
+	  && POINTER_TYPE_P (type))
+	type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
+
+      type = build_pointer_type (type);
+
+      if (in_oacc_offload
+	  && flag_offload_alias != OFFLOAD_ALIAS_ALL)
+	type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
+    }
   else if ((mask & 3) == 1 && is_reference (var))
     type = TREE_TYPE (type);
 
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
new file mode 100644
index 0000000..918bced
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
@@ -0,0 +1,63 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+/* { dg-additional-options "-fdump-tree-alias-all" } */
+/* { dg-additional-options "-foffload-alias=none" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *a;
+  unsigned int *b;
+  unsigned int *c;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+    for (COUNTERTYPE ii = 0; ii < N; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
+/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 5" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 6" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 7" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 9 "alias" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c
new file mode 100644
index 0000000..029d77c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c
@@ -0,0 +1,52 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+/* { dg-additional-options "-fdump-tree-alias-all" } */
+/* { dg-additional-options "-foffload-alias=pointer" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+
+int
+main (void)
+{
+  for (COUNTERTYPE i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+    for (COUNTERTYPE ii = 0; ii < N; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
+/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 6 "alias" } } */
-- 
1.9.1