diff mbox

OpenACC dimension range propagation optimization

Message ID 563A406F.7090506@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Nov. 4, 2015, 5:29 p.m. UTC
On 11/04/15 05:26, Richard Biener wrote:
> On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell <nathan@acm.org> wrote:
>> Richard,

> this all seems a little bit fragile and relying on implementation details?
> Is the attribute always present?  Is the call argument always a constant
> that fits in a HOST_WIDE_INT (or even int here)?  Are there always enough
> 'dims' in the tree list?  Is the 'dim' value always an INTEGER_CST that
> fits a HOST_WIDE_INT (or even an int here)?


> If so I'd like to see helper functions to hide these implementation details
> from generic code like this.

Like this?

I've added two helper functions to omp-low.c, one to  get the internal fn arg 
number and the other to get a dimension value, given an axis number.  omp-low 
seemed the most appropriate point -- that's  where the dimension processing is, 
and the generation of these internal fn calls.

(Bernd, I'll fixup the dimension folding patch to use these calls before 
applying it.)

ok?

nathan

Comments

Richard Biener Nov. 5, 2015, 11:32 a.m. UTC | #1
On Wed, Nov 4, 2015 at 6:29 PM, Nathan Sidwell <nathan@acm.org> wrote:
> On 11/04/15 05:26, Richard Biener wrote:
>>
>> On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell <nathan@acm.org> wrote:
>>>
>>> Richard,
>
>
>> this all seems a little bit fragile and relying on implementation details?
>> Is the attribute always present?  Is the call argument always a constant
>> that fits in a HOST_WIDE_INT (or even int here)?  Are there always enough
>> 'dims' in the tree list?  Is the 'dim' value always an INTEGER_CST that
>> fits a HOST_WIDE_INT (or even an int here)?
>
>
>
>> If so I'd like to see helper functions to hide these implementation
>> details
>> from generic code like this.
>
>
> Like this?
>
> I've added two helper functions to omp-low.c, one to  get the internal fn
> arg number and the other to get a dimension value, given an axis number.
> omp-low seemed the most appropriate point -- that's  where the dimension
> processing is, and the generation of these internal fn calls.
>
> (Bernd, I'll fixup the dimension folding patch to use these calls before
> applying it.)
>
> ok?

Ok

Thanks,
Richard.

> nathan
diff mbox

Patch

2015-11-04  Nathan Sidwell  <nathan@codesourcery.com>

	* target.def (goacc.dim_limit): New hook.
	* targhooks.h (default_goacc_dim_limit): Declare.
	* doc/tm.texi.in (TARGET_GOACC_DIM_LIMIT): Add.
	* doc/tm.texi: Rebuilt.
	* omp-low.h (get_oacc_fn_dim_size, get_oacc_ifn_dim_arg): Declare.
	* omp-low.c (get_oacc_fn_dim_size, get_oacc_ifn_dim_arg): New.
	(default_goacc_dim_limit): New.
	* config/nvptx/nvptx.c (PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
	(nvptx_goacc_dim_limit) New.
	(TARGET_GOACC_DIM_LIMIT): Override.
	* tree-vrp.c: Include omp-low.h, target.h.
	(extract_range_basic): Add handling for IFN_GOACC_DIM_SIZE &
	IFN_GOACC_DIM_POS.

Index: omp-low.c
===================================================================
--- omp-low.c	(revision 229757)
+++ omp-low.c	(working copy)
@@ -12095,6 +12095,41 @@  get_oacc_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
+/* Extract an oacc execution dimension from FN.  FN must be an
+   offloaded function or routine that has already had its execution
+   dimensions lowered to the target-specific values.  */
+
+int
+get_oacc_fn_dim_size (tree fn, int axis)
+{
+  tree attrs = get_oacc_fn_attrib (fn);
+  
+  gcc_assert (axis < GOMP_DIM_MAX);
+
+  tree dims = TREE_VALUE (attrs);
+  while (axis--)
+    dims = TREE_CHAIN (dims);
+
+  int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
+
+  return size;
+}
+
+/* Extract the dimension axis from an IFN_GOACC_DIM_POS or
+   IFN_GOACC_DIM_SIZE call.  */
+
+int
+get_oacc_ifn_dim_arg (const gimple *stmt)
+{
+  gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE
+		       || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
+  tree arg = gimple_call_arg (stmt, 0);
+  HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg);
+
+  gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX);
+  return (int) axis;
+}
+
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
 static void
@@ -19383,6 +19418,18 @@  default_goacc_validate_dims (tree ARG_UN
   return changed;
 }
 
+/* Default dimension bound is unknown on accelerator and 1 on host. */
+
+int
+default_goacc_dim_limit (int ARG_UNUSED (axis))
+{
+#ifdef ACCEL_COMPILER
+  return 0;
+#else
+  return 1;
+#endif
+}
+
 namespace {
 
 const pass_data pass_data_oacc_device_lower =
Index: omp-low.h
===================================================================
--- omp-low.h	(revision 229757)
+++ omp-low.h	(working copy)
@@ -31,6 +31,8 @@  extern bool make_gimple_omp_edges (basic
 extern void omp_finish_file (void);
 extern tree omp_member_access_dummy_var (tree);
 extern tree get_oacc_fn_attrib (tree);
+extern int get_oacc_ifn_dim_arg (const gimple *);
+extern int get_oacc_fn_dim_size (tree, int);
 
 extern GTY(()) vec<tree, va_gc> *offload_funcs;
 extern GTY(()) vec<tree, va_gc> *offload_vars;
Index: targhooks.h
===================================================================
--- targhooks.h	(revision 229757)
+++ targhooks.h	(working copy)
@@ -110,6 +110,7 @@  extern void default_destroy_cost_data (v
 
 /* OpenACC hooks.  */
 extern bool default_goacc_validate_dims (tree, int [], int);
+extern int default_goacc_dim_limit (int);
 extern bool default_goacc_fork_join (gcall *, const int [], bool);
 
 /* These are here, and not in hooks.[ch], because not all users of
Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 229757)
+++ doc/tm.texi	(working copy)
@@ -5777,6 +5777,11 @@  true, if changes have been made.  You mu
 provide dimensions larger than 1.
 @end deftypefn
 
+@deftypefn {Target Hook} int TARGET_GOACC_DIM_LIMIT (int @var{axis})
+This hook should return the maximum size of a particular dimension,
+or zero if unbounded.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int *@var{dims}, bool @var{is_fork})
 This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN
 function calls to target-specific gimple, or indicate whether they
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in	(revision 229757)
+++ doc/tm.texi.in	(working copy)
@@ -4262,6 +4262,8 @@  address;  but often a machine-dependent
 
 @hook TARGET_GOACC_VALIDATE_DIMS
 
+@hook TARGET_GOACC_DIM_LIMIT
+
 @hook TARGET_GOACC_FORK_JOIN
 
 @node Anchored Addresses
Index: tree-vrp.c
===================================================================
--- tree-vrp.c	(revision 229757)
+++ tree-vrp.c	(working copy)
@@ -55,8 +55,8 @@  along with GCC; see the file COPYING3.
 #include "tree-ssa-threadupdate.h"
 #include "tree-ssa-scopedtables.h"
 #include "tree-ssa-threadedge.h"
-
-
+#include "omp-low.h"
+#include "target.h"
 
 /* Range of values that can be associated with an SSA_NAME after VRP
    has executed.  */
@@ -3973,7 +3973,9 @@  extract_range_basic (value_range *vr, gi
   else if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
     {
       enum tree_code subcode = ERROR_MARK;
-      switch (gimple_call_internal_fn (stmt))
+      unsigned ifn_code = gimple_call_internal_fn (stmt);
+
+      switch (ifn_code)
 	{
 	case IFN_UBSAN_CHECK_ADD:
 	  subcode = PLUS_EXPR;
@@ -3984,6 +3986,28 @@  extract_range_basic (value_range *vr, gi
 	case IFN_UBSAN_CHECK_MUL:
 	  subcode = MULT_EXPR;
 	  break;
+	case IFN_GOACC_DIM_SIZE:
+	case IFN_GOACC_DIM_POS:
+	  /* Optimizing these two internal functions helps the loop
+	     optimizer eliminate outer comparisons.  Size is [1,N]
+	     and pos is [0,N-1].  */
+	  {
+	    bool is_pos = ifn_code == IFN_GOACC_DIM_POS;
+	    int axis = get_oacc_ifn_dim_arg (stmt);
+	    int size = get_oacc_fn_dim_size (current_function_decl, axis);
+
+	    if (!size)
+	      /* If it's dynamic, the backend might know a hardware
+		 limitation.  */
+	      size = targetm.goacc.dim_limit (axis);
+
+	    tree type = TREE_TYPE (gimple_call_lhs (stmt));
+	    set_value_range (vr, VR_RANGE,
+			     build_int_cst (type, is_pos ? 0 : 1),
+			     size ? build_int_cst (type, size - is_pos)
+			          : vrp_val_max (type), NULL);
+	  }
+	  return;
 	default:
 	  break;
 	}
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c	(revision 229757)
+++ config/nvptx/nvptx.c	(working copy)
@@ -3248,6 +3248,10 @@  nvptx_file_end (void)
     }
 }
 
+/* Define dimension sizes for known hardware.  */
+#define PTX_VECTOR_LENGTH 32
+#define PTX_WORKER_LENGTH 32
+
 /* Validate compute dimensions of an OpenACC offload or routine, fill
    in non-unity defaults.  FN_LEVEL indicates the level at which a
    routine might spawn a loop.  It is negative for non-routines.  */
@@ -3264,6 +3268,25 @@  nvptx_goacc_validate_dims (tree ARG_UNUS
   return changed;
 }
 
+/* Return maximum dimension size, or zero for unbounded.  */
+
+static int
+nvptx_dim_limit (int axis)
+{
+  switch (axis)
+    {
+    case GOMP_DIM_WORKER:
+      return PTX_WORKER_LENGTH;
+
+    case GOMP_DIM_VECTOR:
+      return PTX_VECTOR_LENGTH;
+
+    default:
+      break;
+    }
+  return 0;
+}
+
 /* Determine whether fork & joins are needed.  */
 
 static bool
@@ -3376,6 +3399,9 @@  nvptx_goacc_fork_join (gcall *call, cons
 #undef TARGET_GOACC_VALIDATE_DIMS
 #define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims
 
+#undef TARGET_GOACC_DIM_LIMIT
+#define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
+
 #undef TARGET_GOACC_FORK_JOIN
 #define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
 
Index: target.def
===================================================================
--- target.def	(revision 229757)
+++ target.def	(working copy)
@@ -1659,6 +1659,13 @@  bool, (tree decl, int *dims, int fn_leve
 default_goacc_validate_dims)
 
 DEFHOOK
+(dim_limit,
+"This hook should return the maximum size of a particular dimension,\n\
+or zero if unbounded.",
+int, (int axis),
+default_goacc_dim_limit)
+
+DEFHOOK
 (fork_join,
 "This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\
 function calls to target-specific gimple, or indicate whether they\n\