diff mbox

[gomp4] Unidirectional branches for nvptx

Message ID 555C7A8B.10304@codesourcery.com
State New
Headers show

Commit Message

Bernd Schmidt May 20, 2015, 12:14 p.m. UTC
This adds functionality to the nvptx backend to emit uni-directional 
branches. The idea is to recognize the previously introduced 
warp-broadcast pattern; we know that its result is constant across an 
entire warp of threads, so any value based on that result has the same 
property. If a jump condition is constant across a warp, add ".uni".

Committed on gomp-4_0-branch.


Bernd
diff mbox

Patch

Index: gcc/ChangeLog.gomp
===================================================================
--- gcc/ChangeLog.gomp	(revision 223443)
+++ gcc/ChangeLog.gomp	(working copy)
@@ -1,3 +1,13 @@ 
+2015-05-20  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* config/nvptx/nvptx.c: Include "dumpfile,h".
+	(condition_unidirectional_p): New static function.
+	(nvptx_print_operand): Use it for new 'U' handling.
+	(nvptx_reorg): Compute warp_equal_pseudos.
+	* config/nvptx/nvptx.h (struct machine_function): New field
+	warp_equal_pseudos.
+	* config/nvptx/nvptx.md (br_true, br_false): Add %U modifier.
+
 2015-05-19  Bernd Schmidt  <bernds@codesourcery.com>
 
 	* omp-builtins.def (GOACC_thread_broadcast,
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 223443)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -72,6 +72,7 @@ 
 #include "cfgrtl.h"
 #include "stor-layout.h"
 #include "df.h"
+#include "dumpfile.h"
 #include "builtins.h"
 
 /* Record the function decls we've written, and the libfuncs and function
@@ -1646,6 +1647,23 @@  nvptx_print_operand_address (FILE *file,
   nvptx_print_address_operand (file, addr, VOIDmode);
 }
 
+/* Return true if the value of COND is the same across all threads in a
+   warp.  */
+
+static bool
+condition_unidirectional_p (rtx cond)
+{
+  if (CONSTANT_P (cond))
+    return true;
+  if (GET_CODE (cond) == REG)
+    return cfun->machine->warp_equal_pseudos[REGNO (cond)];
+  if (GET_RTX_CLASS (GET_CODE (cond)) == RTX_COMPARE
+      || GET_RTX_CLASS (GET_CODE (cond)) == RTX_COMM_COMPARE)
+    return (condition_unidirectional_p (XEXP (cond, 0))
+	    && condition_unidirectional_p (XEXP (cond, 1)));
+  return false;
+}
+
 /* Print an operand, X, to FILE, with an optional modifier in CODE.
 
    Meaning of CODE:
@@ -1659,7 +1677,9 @@  nvptx_print_operand_address (FILE *file,
    f -- print a full reg even for something that must always be split
    t -- print a type opcode suffix, promoting QImode to 32 bits
    T -- print a type size in bits
-   u -- print a type opcode suffix without promotions.  */
+   u -- print a type opcode suffix without promotions.
+   U -- print ".uni" if a condition consists only of values equal across all
+        threads in a warp.  */
 
 static void
 nvptx_print_operand (FILE *file, rtx x, int code)
@@ -1732,6 +1752,11 @@  nvptx_print_operand (FILE *file, rtx x,
       fprintf (file, "@!");
       goto common;
 
+    case 'U':
+      if (condition_unidirectional_p (x))
+	fprintf (file, ".uni");
+      break;
+
     case 'c':
       op_mode = GET_MODE (XEXP (x, 0));
       switch (x_code)
@@ -1899,6 +1924,12 @@  nvptx_reorg (void)
 
   df_clear_flags (DF_LR_RUN_DCE);
   df_analyze ();
+  regstat_init_n_sets_and_refs ();
+  int max_regs = max_reg_num ();
+
+  for (int i = LAST_VIRTUAL_REGISTER + 1; i < max_regs; i++)
+    if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
+      regno_reg_rtx[i] = const0_rtx;
 
   thread_prologue_and_epilogue_insns ();
 
@@ -1911,6 +1942,11 @@  nvptx_reorg (void)
   siregs.mode = SImode;
   diregs.mode = DImode;
 
+  cfun->machine->warp_equal_pseudos
+    = ggc_cleared_vec_alloc<char> (max_regs);
+
+  auto_vec<unsigned> warp_reg_worklist;
+
   for (insn = get_insns (); insn; insn = next)
     {
       next = NEXT_INSN (insn);
@@ -1919,11 +1955,25 @@  nvptx_reorg (void)
 	  || GET_CODE (PATTERN (insn)) == USE
 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
 	continue;
+
       qiregs.n_in_use = 0;
       hiregs.n_in_use = 0;
       siregs.n_in_use = 0;
       diregs.n_in_use = 0;
       extract_insn (insn);
+
+      if (recog_memoized (insn) == CODE_FOR_oacc_thread_broadcastsi
+	  || (GET_CODE (PATTERN (insn)) == SET
+	      && CONSTANT_P (SET_SRC (PATTERN (insn)))))
+	{
+	  rtx dest = recog_data.operand[0];
+	  if (REG_P (dest) && REG_N_SETS (REGNO (dest)) == 1)
+	    {
+	      cfun->machine->warp_equal_pseudos[REGNO (dest)] = true;
+	      warp_reg_worklist.safe_push (REGNO (dest));
+	    }
+	}
+
       enum attr_subregs_ok s_ok = get_attr_subregs_ok (insn);
       for (int i = 0; i < recog_data.n_operands; i++)
 	{
@@ -1978,12 +2028,55 @@  nvptx_reorg (void)
 	}
     }
 
-  int maxregs = max_reg_num ();
-  regstat_init_n_sets_and_refs ();
+  while (!warp_reg_worklist.is_empty ())
+    {
+      int regno = warp_reg_worklist.pop ();
+      
+      df_ref use = DF_REG_USE_CHAIN (regno);
+      for (; use; use = DF_REF_NEXT_REG (use))
+	{
+	  rtx_insn *insn;
+	  if (!DF_REF_INSN_INFO (use))
+	    continue;
+	  insn = DF_REF_INSN (use);
+	  if (DEBUG_INSN_P (insn))
+	    continue;
 
-  for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
-    if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
-      regno_reg_rtx[i] = const0_rtx;
+	  /* The only insns we have to exclude are those which refer to
+	     memory.  */
+	  rtx pat = PATTERN (insn);
+	  if (GET_CODE (pat) == SET
+	      && (MEM_P (SET_SRC (pat)) || MEM_P (SET_DEST (pat))))
+	    continue;
+
+	  df_ref insn_use;
+	  bool all_equal = true;
+	  FOR_EACH_INSN_USE (insn_use, insn)
+	    {
+	      unsigned insn_regno = DF_REF_REGNO (insn_use);
+	      if (!cfun->machine->warp_equal_pseudos[insn_regno])
+		{
+		  all_equal = false;
+		  break;
+		}
+	    }
+	  if (!all_equal)
+	    continue;
+	  df_ref insn_def;
+	  FOR_EACH_INSN_DEF (insn_def, insn)
+	    {
+	      unsigned dregno = DF_REF_REGNO (insn_def);
+	      if (cfun->machine->warp_equal_pseudos[dregno])
+		continue;
+	      cfun->machine->warp_equal_pseudos[dregno] = true;
+	      warp_reg_worklist.safe_push (dregno);
+	    }
+	}
+    }
+  if (dump_file)
+    for (int i = 0; i < max_regs; i++)
+      if (cfun->machine->warp_equal_pseudos[i])
+	fprintf (dump_file, "Found warp invariant pseudo %d\n", i);
   regstat_free_n_sets_and_refs ();
 }
 
Index: gcc/config/nvptx/nvptx.h
===================================================================
--- gcc/config/nvptx/nvptx.h	(revision 223442)
+++ gcc/config/nvptx/nvptx.h	(working copy)
@@ -235,6 +235,7 @@  struct nvptx_pseudo_info
 struct GTY(()) machine_function
 {
   rtx_expr_list *call_args;
+  char *warp_equal_pseudos;
   rtx start_call;
   tree funtype;
   bool has_call_with_varargs;
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 223443)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -814,7 +814,7 @@  (define_insn "br_true"
 		      (label_ref (match_operand 1 "" ""))
 		      (pc)))]
   ""
-  "%j0\\tbra\\t%l1;")
+  "%j0\\tbra%U0\\t%l1;")
 
 (define_insn "br_false"
   [(set (pc)
@@ -823,7 +823,7 @@  (define_insn "br_false"
 		      (label_ref (match_operand 1 "" ""))
 		      (pc)))]
   ""
-  "%J0\\tbra\\t%l1;")
+  "%J0\\tbra%U0\\t%l1;")
 
 (define_expand "cbranch<mode>4"
   [(set (pc)