diff mbox

Recognize vec_perm_expr in a constructor of bit_field_ref

Message ID alpine.DEB.2.02.1209111713360.28628@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Sept. 11, 2012, 3:15 p.m. UTC
On Tue, 11 Sep 2012, Richard Guenther wrote:

> On Tue, Sep 11, 2012 at 1:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the
>> target is ok with it.
>>
>> I am attaching 2 versions of the patch. p-good is the one that passes
>> testing. p-bad, where I rely on fold_stmt to detect identity permutations,
>> ICEs towards the end of the pass while checking a bogus gimple stmt (one
>> that gimple_debug_stmt crashes on if I call it in gdb). From a performance
>> point of view, p-good makes sense, but I liked the simplicity of p-bad and I
>> am confused as to why it fails.
>
> Probably because you cannot simply increase num_ops ...
>
>> 2012-09-11  Marc Glisse  <marc.glisse@inria.fr>
>>
>> gcc/
>>         * tree-ssa-forwprop.c (simplify_vector_constructor): New function.
>>         (ssa_forward_propagate_and_combine): Call it.
>>
>> gcc/testsuite/
>>         * gcc.dg/tree-ssa/forwprop-22.c: New testcase.
[...]
> Ok with that change.

Attached is what I am testing and will commit if it passes.
diff mbox

Patch

Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c	(revision 191187)
+++ gcc/tree-ssa-forwprop.c	(working copy)
@@ -26,20 +26,21 @@  along with GCC; see the file COPYING3.
 #include "tm_p.h"
 #include "basic-block.h"
 #include "gimple-pretty-print.h"
 #include "tree-flow.h"
 #include "tree-pass.h"
 #include "langhooks.h"
 #include "flags.h"
 #include "gimple.h"
 #include "expr.h"
 #include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 /* This pass propagates the RHS of assignment statements into use
    sites of the LHS of the assignment.  It's basically a specialized
    form of tree combination.   It is hoped all of this can disappear
    when we have a generalized tree combiner.
 
    One class of common cases we handle is forward propagating a single use
    variable into a COND_EXPR.
 
      bb0:
@@ -2787,20 +2788,98 @@  simplify_permutation (gimple_stmt_iterat
       if (TREE_CODE (op0) == SSA_NAME)
 	ret = remove_prop_source_from_use (op0);
       if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
 	ret |= remove_prop_source_from_use (op1);
       return ret ? 2 : 1;
     }
 
   return 0;
 }
 
+/* Recognize a VEC_PERM_EXPR.  Returns true if there were any changes.  */
+
+static bool
+simplify_vector_constructor (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  gimple def_stmt;
+  tree op, op2, orig, type, elem_type;
+  unsigned elem_size, nelts, i;
+  enum tree_code code;
+  constructor_elt *elt;
+  unsigned char *sel;
+  bool maybe_ident;
+
+  gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
+
+  op = gimple_assign_rhs1 (stmt);
+  type = TREE_TYPE (op);
+  gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE);
+
+  nelts = TYPE_VECTOR_SUBPARTS (type);
+  elem_type = TREE_TYPE (type);
+  elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+
+  sel = XALLOCAVEC (unsigned char, nelts);
+  orig = NULL;
+  maybe_ident = true;
+  FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt)
+    {
+      tree ref, op1;
+
+      if (i >= nelts)
+	return false;
+
+      if (TREE_CODE (elt->value) != SSA_NAME)
+	return false;
+      def_stmt = SSA_NAME_DEF_STMT (elt->value);
+      if (!def_stmt || !is_gimple_assign (def_stmt))
+	return false;
+      code = gimple_assign_rhs_code (def_stmt);
+      if (code != BIT_FIELD_REF)
+	return false;
+      op1 = gimple_assign_rhs1 (def_stmt);
+      ref = TREE_OPERAND (op1, 0);
+      if (orig)
+	{
+	  if (ref != orig)
+	    return false;
+	}
+      else
+	{
+	  if (TREE_CODE (ref) != SSA_NAME)
+	    return false;
+	  orig = ref;
+	}
+      if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
+	return false;
+      sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
+      if (sel[i] != i) maybe_ident = false;
+    }
+  if (i < nelts)
+    return false;
+
+  if (maybe_ident)
+    {
+      gimple_assign_set_rhs_from_tree (gsi, orig);
+    }
+  else
+    {
+      op2 = vect_gen_perm_mask (type, sel);
+      if (!op2)
+	return false;
+      gimple_assign_set_rhs_with_ops_1 (gsi, VEC_PERM_EXPR, orig, orig, op2);
+    }
+  update_stmt (gsi_stmt (*gsi));
+  return true;
+}
+
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2958,20 +3037,23 @@  ssa_forward_propagate_and_combine (void)
 		  }
 		else if (code == VEC_PERM_EXPR)
 		  {
 		    int did_something = simplify_permutation (&gsi);
 		    if (did_something == 2)
 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
 		else if (code == BIT_FIELD_REF)
 		  changed = simplify_bitfield_ref (&gsi);
+                else if (code == CONSTRUCTOR
+                         && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE)
+                  changed = simplify_vector_constructor (&gsi);
 		break;
 	      }
 
 	    case GIMPLE_SWITCH:
 	      changed = simplify_gimple_switch (stmt);
 	      break;
 
 	    case GIMPLE_COND:
 	      {
 		int did_something;
Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-22.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/forwprop-22.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-22.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_perm } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef double vec __attribute__((vector_size (2 * sizeof (double))));
+void f (vec *px, vec *y, vec *z)
+{
+  vec x = *px;
+  vec t1 = { x[1], x[0] };
+  vec t2 = { x[0], x[1] };
+  *y = t1;
+  *z = t2;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-22.c
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision URL

Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 191187)
+++ gcc/Makefile.in	(working copy)
@@ -2237,21 +2237,22 @@  tree-outof-ssa.o : tree-outof-ssa.c $(TR
    $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \
    $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \
    $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \
    $(GIMPLE_PRETTY_PRINT_H) langhooks.h
 tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
-   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H)
+   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
+   $(TREE_VECTORIZER_H)
 tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    $(TREE_PRETTY_PRINT_H)
 tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \