diff mbox

combine permutations in gimple

Message ID alpine.DEB.2.02.1208181146450.5811@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Aug. 18, 2012, 9:59 a.m. UTC
Hello,

here is a new patch (passes bootstrap+regtest), which only combines
permutations if the result is the identity. I'll file a PR about more general
combinations to link to this conversation and the cost hook proposition.

Ok?


2012-08-18  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* fold-const.c (fold_ternary_loc): Detect identity permutations.
 	Canonicalize permutations more.
 	* tree-ssa-forwprop.c (is_combined_permutation_identity): New function.
 	(simplify_permutation): Likewise.
 	(ssa_forward_propagate_and_combine): Call it.

gcc/testsuite/
 	* gcc.dg/tree-ssa/forwprop-19.c: New testcase.
 	* gcc.dg/fold-perm.c: Likewise.

Comments

Richard Biener Aug. 20, 2012, 11:35 a.m. UTC | #1
On Sat, Aug 18, 2012 at 11:59 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> here is a new patch (passes bootstrap+regtest), which only combines
> permutations if the result is the identity. I'll file a PR about more
> general
> combinations to link to this conversation and the cost hook proposition.
>
> Ok?

Ok.

Thanks,
Richard.

>
> 2012-08-18  Marc Glisse  <marc.glisse@inria.fr>
>
>
> gcc/
>         * fold-const.c (fold_ternary_loc): Detect identity permutations.
>         Canonicalize permutations more.
>         * tree-ssa-forwprop.c (is_combined_permutation_identity): New
> function.
>
>         (simplify_permutation): Likewise.
>         (ssa_forward_propagate_and_combine): Call it.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/forwprop-19.c: New testcase.
>         * gcc.dg/fold-perm.c: Likewise.
>
> --
> Marc Glisse
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    (revision 190500)
> +++ gcc/fold-const.c    (working copy)
> @@ -14148,58 +14148,96 @@ fold_ternary_loc (location_t loc, enum t
>         return fold_build2_loc (loc, PLUS_EXPR, type,
>                                 const_binop (MULT_EXPR, arg0, arg1), arg2);
>        if (integer_zerop (arg2))
>         return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
>
>        return fold_fma (loc, type, arg0, arg1, arg2);
>
>      case VEC_PERM_EXPR:
>        if (TREE_CODE (arg2) == VECTOR_CST)
>         {
> -         unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
> +         unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;
>           unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
>           tree t;
>           bool need_mask_canon = false;
> +         bool all_in_vec0 = true;
> +         bool all_in_vec1 = true;
> +         bool maybe_identity = true;
> +         bool single_arg = (op0 == op1);
> +         bool changed = false;
>
> +         mask = single_arg ? (nelts - 1) : (2 * nelts - 1);
>           gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
>           for (i = 0; i < nelts; i++)
>             {
>               tree val = VECTOR_CST_ELT (arg2, i);
>               if (TREE_CODE (val) != INTEGER_CST)
>                 return NULL_TREE;
>
> -             sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
> +             sel[i] = TREE_INT_CST_LOW (val) & mask;
>               if (TREE_INT_CST_HIGH (val)
>                   || ((unsigned HOST_WIDE_INT)
>                       TREE_INT_CST_LOW (val) != sel[i]))
>                 need_mask_canon = true;
> +
> +             if (sel[i] < nelts)
> +               all_in_vec1 = false;
> +             else
> +               all_in_vec0 = false;
> +
> +             if ((sel[i] & (nelts-1)) != i)
> +               maybe_identity = false;
> +           }
> +
> +         if (maybe_identity)
> +           {
> +             if (all_in_vec0)
> +               return op0;
> +             if (all_in_vec1)
> +               return op1;
>             }
>
>           if ((TREE_CODE (arg0) == VECTOR_CST
>                || TREE_CODE (arg0) == CONSTRUCTOR)
>               && (TREE_CODE (arg1) == VECTOR_CST
>                   || TREE_CODE (arg1) == CONSTRUCTOR))
>             {
>               t = fold_vec_perm (type, arg0, arg1, sel);
>               if (t != NULL_TREE)
>                 return t;
>             }
>
> +         if (all_in_vec0)
> +           op1 = op0;
> +         else if (all_in_vec1)
> +           {
> +             op0 = op1;
> +             for (i = 0; i < nelts; i++)
> +               sel[i] -= nelts;
> +             need_mask_canon = true;
> +           }
> +
> +         if (op0 == op1 && !single_arg)
> +           changed = true;
> +
>           if (need_mask_canon && arg2 == op2)
>             {
>               tree *tsel = XALLOCAVEC (tree, nelts);
>               tree eltype = TREE_TYPE (TREE_TYPE (arg2));
>               for (i = 0; i < nelts; i++)
>                 tsel[i] = build_int_cst (eltype, sel[i]);
> -             t = build_vector (TREE_TYPE (arg2), tsel);
> -             return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);
> +             op2 = build_vector (TREE_TYPE (arg2), tsel);
> +             changed = true;
>             }
> +
> +         if (changed)
> +           return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
>         }
>        return NULL_TREE;
>
>      default:
>        return NULL_TREE;
>      } /* switch (code) */
>  }
>
>  /* Perform constant folding and related simplification of EXPR.
>     The related simplifications include x*1 => x, x*0 => 0, etc.,
> Index: gcc/tree-ssa-forwprop.c
> ===================================================================
> --- gcc/tree-ssa-forwprop.c     (revision 190500)
> +++ gcc/tree-ssa-forwprop.c     (working copy)
> @@ -2570,20 +2570,109 @@ combine_conversions (gimple_stmt_iterato
>               gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
>               update_stmt (stmt);
>               return remove_prop_source_from_use (op0) ? 2 : 1;
>             }
>         }
>      }
>
>    return 0;
>  }
>
> +/* Determine whether applying the 2 permutations (mask1 then mask2)
> +   gives back one of the input.  */
> +
> +static int
> +is_combined_permutation_identity (tree mask1, tree mask2)
> +{
> +  tree mask;
> +  unsigned int nelts, i, j;
> +  bool maybe_identity1 = true;
> +  bool maybe_identity2 = true;
> +
> +  gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST
> +                      && TREE_CODE (mask2) == VECTOR_CST);
> +  mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1,
> mask2);
> +  gcc_assert (TREE_CODE (mask) == VECTOR_CST);
> +
> +  nelts = VECTOR_CST_NELTS (mask);
> +  for (i = 0; i < nelts; i++)
> +    {
> +      tree val = VECTOR_CST_ELT (mask, i);
> +      gcc_assert (TREE_CODE (val) == INTEGER_CST);
> +      j = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
> +      if (j == i)
> +       maybe_identity2 = false;
> +      else if (j == i + nelts)
> +       maybe_identity1 = false;
> +      else
> +       return 0;
> +    }
> +  return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0;
> +}
> +
> +/* Combine two shuffles in a row.  Returns 1 if there were any changes
> +   made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
> +
> +static int
> +simplify_permutation (gimple_stmt_iterator *gsi)
> +{
> +  gimple stmt = gsi_stmt (*gsi);
> +  gimple def_stmt;
> +  tree op0, op1, op2, op3;
> +  enum tree_code code = gimple_assign_rhs_code (stmt);
> +  enum tree_code code2;
> +
> +  gcc_checking_assert (code == VEC_PERM_EXPR);
> +
> +  op0 = gimple_assign_rhs1 (stmt);
> +  op1 = gimple_assign_rhs2 (stmt);
> +  op2 = gimple_assign_rhs3 (stmt);
> +
> +  if (TREE_CODE (op0) != SSA_NAME)
> +    return 0;
> +
> +  if (TREE_CODE (op2) != VECTOR_CST)
> +    return 0;
> +
> +  if (op0 != op1)
> +    return 0;
> +
> +  def_stmt = SSA_NAME_DEF_STMT (op0);
> +  if (!def_stmt || !is_gimple_assign (def_stmt)
> +      || !can_propagate_from (def_stmt))
> +    return 0;
> +
> +  code2 = gimple_assign_rhs_code (def_stmt);
> +
> +  /* Two consecutive shuffles.  */
> +  if (code2 == VEC_PERM_EXPR)
> +    {
> +      tree orig;
> +      int ident;
> +      op3 = gimple_assign_rhs3 (def_stmt);
> +      if (TREE_CODE (op3) != VECTOR_CST)
> +       return 0;
> +      ident = is_combined_permutation_identity (op3, op2);
> +      if (!ident)
> +       return 0;
> +      orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
> +                         : gimple_assign_rhs2 (def_stmt);
> +      gimple_assign_set_rhs1 (stmt, unshare_expr (orig));
> +      gimple_assign_set_rhs_code (stmt, TREE_CODE (orig));
> +      gimple_set_num_ops (stmt, 2);
> +      update_stmt (stmt);
> +      return remove_prop_source_from_use (op0) ? 2 : 1;
> +    }
> +
> +  return false;
> +}
> +
>  /* Main entry point for the forward propagation and statement combine
>     optimizer.  */
>
>  static unsigned int
>  ssa_forward_propagate_and_combine (void)
>  {
>    basic_block bb;
>    unsigned int todoflags = 0;
>
>    cfg_changed = false;
> @@ -2732,20 +2821,27 @@ ssa_forward_propagate_and_combine (void)
>                   changed = associate_pointerplus (&gsi);
>                 else if (CONVERT_EXPR_CODE_P (code)
>                          || code == FLOAT_EXPR
>                          || code == FIX_TRUNC_EXPR)
>                   {
>                     int did_something = combine_conversions (&gsi);
>                     if (did_something == 2)
>                       cfg_changed = true;
>                     changed = did_something != 0;
>                   }
> +               else if (code == VEC_PERM_EXPR)
> +                 {
> +                   int did_something = simplify_permutation (&gsi);
> +                   if (did_something == 2)
> +                     cfg_changed = true;
> +                   changed = did_something != 0;
> +                 }
>                 break;
>               }
>
>             case GIMPLE_SWITCH:
>               changed = simplify_gimple_switch (stmt);
>               break;
>
>             case GIMPLE_COND:
>               {
>                 int did_something;
> Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0)
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-forwprop2" } */
> +
> +typedef int vec __attribute__((vector_size (4 * sizeof (int))));
> +void f (vec *x1, vec *x2)
> +{
> +  vec m = { 1, 2, 3, 0 };
> +  vec n = { 3, 0, 1, 2 };
> +  vec y = __builtin_shuffle (*x1, *x2, n);
> +  vec z = __builtin_shuffle (y, m);
> +  *x1 = z;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "forwprop2" } } */
> +/* { dg-final { cleanup-tree-dump "forwprop2" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: gcc/testsuite/gcc.dg/fold-perm.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/fold-perm.c    (revision 0)
> +++ gcc/testsuite/gcc.dg/fold-perm.c    (revision 0)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> +
> +void fun (veci *f, veci *g, veci *h)
> +{
> +  veci m = { 7, 7, 4, 6 };
> +  veci n = { 0, 1, 2, 3 };
> +  veci p = { 1, 1, 7, 6 };
> +  *h = __builtin_shuffle (*h, *h, p);
> +  *g = __builtin_shuffle (*f, *g, m);
> +  *f = __builtin_shuffle (*f, *g, n);
> +}
> +
> +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 3, 3, 0, 2 }" "ccp1" } }
> */
> +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 3, 2 }" "ccp1" } }
> */
> +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "ccp1" } } */
> +/* { dg-final { cleanup-tree-dump "ccp1" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/fold-perm.c
> ___________________________________________________________________
> Added: svn:eol-style
>    + native
> Added: svn:keywords
>    + Author Date Id Revision URL
>
>
diff mbox

Patch

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 190500)
+++ gcc/fold-const.c	(working copy)
@@ -14148,58 +14148,96 @@  fold_ternary_loc (location_t loc, enum t
 	return fold_build2_loc (loc, PLUS_EXPR, type,
 				const_binop (MULT_EXPR, arg0, arg1), arg2);
       if (integer_zerop (arg2))
 	return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
 
       return fold_fma (loc, type, arg0, arg1, arg2);
 
     case VEC_PERM_EXPR:
       if (TREE_CODE (arg2) == VECTOR_CST)
 	{
-	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;
 	  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
 	  tree t;
 	  bool need_mask_canon = false;
+	  bool all_in_vec0 = true;
+	  bool all_in_vec1 = true;
+	  bool maybe_identity = true;
+	  bool single_arg = (op0 == op1);
+	  bool changed = false;
 
+	  mask = single_arg ? (nelts - 1) : (2 * nelts - 1);
 	  gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
 	  for (i = 0; i < nelts; i++)
 	    {
 	      tree val = VECTOR_CST_ELT (arg2, i);
 	      if (TREE_CODE (val) != INTEGER_CST)
 		return NULL_TREE;
 
-	      sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
+	      sel[i] = TREE_INT_CST_LOW (val) & mask;
 	      if (TREE_INT_CST_HIGH (val)
 		  || ((unsigned HOST_WIDE_INT)
 		      TREE_INT_CST_LOW (val) != sel[i]))
 		need_mask_canon = true;
+
+	      if (sel[i] < nelts)
+		all_in_vec1 = false;
+	      else
+		all_in_vec0 = false;
+
+	      if ((sel[i] & (nelts-1)) != i)
+		maybe_identity = false;
+	    }
+
+	  if (maybe_identity)
+	    {
+	      if (all_in_vec0)
+		return op0;
+	      if (all_in_vec1)
+		return op1;
 	    }
 
 	  if ((TREE_CODE (arg0) == VECTOR_CST
 	       || TREE_CODE (arg0) == CONSTRUCTOR)
 	      && (TREE_CODE (arg1) == VECTOR_CST
 		  || TREE_CODE (arg1) == CONSTRUCTOR))
 	    {
 	      t = fold_vec_perm (type, arg0, arg1, sel);
 	      if (t != NULL_TREE)
 		return t;
 	    }
 
+	  if (all_in_vec0)
+	    op1 = op0;
+	  else if (all_in_vec1)
+	    {
+	      op0 = op1;
+	      for (i = 0; i < nelts; i++)
+		sel[i] -= nelts;
+	      need_mask_canon = true;
+	    }
+
+	  if (op0 == op1 && !single_arg)
+	    changed = true;
+
 	  if (need_mask_canon && arg2 == op2)
 	    {
 	      tree *tsel = XALLOCAVEC (tree, nelts);
 	      tree eltype = TREE_TYPE (TREE_TYPE (arg2));
 	      for (i = 0; i < nelts; i++)
 		tsel[i] = build_int_cst (eltype, sel[i]);
-	      t = build_vector (TREE_TYPE (arg2), tsel);
-	      return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);
+	      op2 = build_vector (TREE_TYPE (arg2), tsel);
+	      changed = true;
 	    }
+
+	  if (changed)
+	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
 	}
       return NULL_TREE;
 
     default:
       return NULL_TREE;
     } /* switch (code) */
 }
 
 /* Perform constant folding and related simplification of EXPR.
    The related simplifications include x*1 => x, x*0 => 0, etc.,
Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c	(revision 190500)
+++ gcc/tree-ssa-forwprop.c	(working copy)
@@ -2570,20 +2570,109 @@  combine_conversions (gimple_stmt_iterato
 	      gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
 	      update_stmt (stmt);
 	      return remove_prop_source_from_use (op0) ? 2 : 1;
 	    }
 	}
     }
 
   return 0;
 }
 
+/* Determine whether applying the 2 permutations (mask1 then mask2)
+   gives back one of the input.  */
+
+static int
+is_combined_permutation_identity (tree mask1, tree mask2)
+{
+  tree mask;
+  unsigned int nelts, i, j;
+  bool maybe_identity1 = true;
+  bool maybe_identity2 = true;
+
+  gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST
+		       && TREE_CODE (mask2) == VECTOR_CST);
+  mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2);
+  gcc_assert (TREE_CODE (mask) == VECTOR_CST);
+
+  nelts = VECTOR_CST_NELTS (mask);
+  for (i = 0; i < nelts; i++)
+    {
+      tree val = VECTOR_CST_ELT (mask, i);
+      gcc_assert (TREE_CODE (val) == INTEGER_CST);
+      j = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
+      if (j == i)
+	maybe_identity2 = false;
+      else if (j == i + nelts)
+	maybe_identity1 = false;
+      else
+	return 0;
+    }
+  return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0;
+}
+
+/* Combine two shuffles in a row.  Returns 1 if there were any changes
+   made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
+ 
+static int
+simplify_permutation (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  gimple def_stmt;
+  tree op0, op1, op2, op3;
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  enum tree_code code2;
+
+  gcc_checking_assert (code == VEC_PERM_EXPR);
+
+  op0 = gimple_assign_rhs1 (stmt);
+  op1 = gimple_assign_rhs2 (stmt);
+  op2 = gimple_assign_rhs3 (stmt);
+
+  if (TREE_CODE (op0) != SSA_NAME)
+    return 0;
+
+  if (TREE_CODE (op2) != VECTOR_CST)
+    return 0;
+
+  if (op0 != op1)
+    return 0;
+
+  def_stmt = SSA_NAME_DEF_STMT (op0);
+  if (!def_stmt || !is_gimple_assign (def_stmt)
+      || !can_propagate_from (def_stmt))
+    return 0;
+
+  code2 = gimple_assign_rhs_code (def_stmt);
+
+  /* Two consecutive shuffles.  */
+  if (code2 == VEC_PERM_EXPR)
+    {
+      tree orig;
+      int ident;
+      op3 = gimple_assign_rhs3 (def_stmt);
+      if (TREE_CODE (op3) != VECTOR_CST)
+	return 0;
+      ident = is_combined_permutation_identity (op3, op2);
+      if (!ident)
+	return 0;
+      orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
+			  : gimple_assign_rhs2 (def_stmt);
+      gimple_assign_set_rhs1 (stmt, unshare_expr (orig));
+      gimple_assign_set_rhs_code (stmt, TREE_CODE (orig));
+      gimple_set_num_ops (stmt, 2);
+      update_stmt (stmt);
+      return remove_prop_source_from_use (op0) ? 2 : 1;
+    }
+
+  return false;
+}
+
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2732,20 +2821,27 @@  ssa_forward_propagate_and_combine (void)
 		  changed = associate_pointerplus (&gsi);
 		else if (CONVERT_EXPR_CODE_P (code)
 			 || code == FLOAT_EXPR
 			 || code == FIX_TRUNC_EXPR)
 		  {
 		    int did_something = combine_conversions (&gsi);
 		    if (did_something == 2)
 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
+		else if (code == VEC_PERM_EXPR)
+		  {
+		    int did_something = simplify_permutation (&gsi);
+		    if (did_something == 2)
+		      cfg_changed = true;
+		    changed = did_something != 0;
+		  }
 		break;
 	      }
 
 	    case GIMPLE_SWITCH:
 	      changed = simplify_gimple_switch (stmt);
 	      break;
 
 	    case GIMPLE_COND:
 	      {
 		int did_something;
Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop2" } */
+
+typedef int vec __attribute__((vector_size (4 * sizeof (int))));
+void f (vec *x1, vec *x2)
+{
+  vec m = { 1, 2, 3, 0 };
+  vec n = { 3, 0, 1, 2 };
+  vec y = __builtin_shuffle (*x1, *x2, n);
+  vec z = __builtin_shuffle (y, m);
+  *x1 = z;
+}
+
+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "forwprop2" } } */
+/* { dg-final { cleanup-tree-dump "forwprop2" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/testsuite/gcc.dg/fold-perm.c
===================================================================
--- gcc/testsuite/gcc.dg/fold-perm.c	(revision 0)
+++ gcc/testsuite/gcc.dg/fold-perm.c	(revision 0)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
+
+void fun (veci *f, veci *g, veci *h)
+{
+  veci m = { 7, 7, 4, 6 };
+  veci n = { 0, 1, 2, 3 };
+  veci p = { 1, 1, 7, 6 };
+  *h = __builtin_shuffle (*h, *h, p);
+  *g = __builtin_shuffle (*f, *g, m);
+  *f = __builtin_shuffle (*f, *g, n);
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 3, 3, 0, 2 }" "ccp1" } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 3, 2 }" "ccp1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "ccp1" } } */
+/* { dg-final { cleanup-tree-dump "ccp1" } } */