diff mbox

[5/7] Move the fix for PR65518

Message ID 87poriltqh.fsf@e105548-lin.cambridge.arm.com
State New
Headers show

Commit Message

Richard Sandiford June 15, 2016, 8:52 a.m. UTC
This patch moves the fix for PR65518 to the code that checks whether
load-and-permute operations are supported.   If the group size is
greater than the vectorisation factor, it would still be possible
to fall back to elementwise loads (as for strided groups) rather
than fail vectorisation entirely.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
	* tree-vectorizer.h (vect_grouped_load_supported): Add a
	single_element_p parameter.
	* tree-vect-data-refs.c (vect_grouped_load_supported): Likewise.
	Check the PR65518 case here rather than in vectorizable_load.
	* tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly.
	* tree-vect-stmts.c (vectorizable_load): Likewise.

Comments

Richard Biener June 15, 2016, 1:31 p.m. UTC | #1
On Wed, Jun 15, 2016 at 10:52 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> This patch moves the fix for PR65518 to the code that checks whether
> load-and-permute operations are supported.   If the group size is
> greater than the vectorisation factor, it would still be possible
> to fall back to elementwise loads (as for strided groups) rather
> than fail vectorisation entirely.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Ok.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> gcc/
>         * tree-vectorizer.h (vect_grouped_load_supported): Add a
>         single_element_p parameter.
>         * tree-vect-data-refs.c (vect_grouped_load_supported): Likewise.
>         Check the PR65518 case here rather than in vectorizable_load.
>         * tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly.
>         * tree-vect-stmts.c (vectorizable_load): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h
> +++ gcc/tree-vectorizer.h
> @@ -1069,7 +1069,7 @@ extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *,
>  extern tree vect_create_destination_var (tree, tree);
>  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
>  extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
> -extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT);
> +extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
>  extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
>  extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *,
>                                      gimple_stmt_iterator *, vec<tree> *);
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c
> +++ gcc/tree-vect-data-refs.c
> @@ -5131,14 +5131,31 @@ vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
>
>  /* Function vect_grouped_load_supported.
>
> -   Returns TRUE if even and odd permutations are supported,
> -   and FALSE otherwise.  */
> +   COUNT is the size of the load group (the number of statements plus the
> +   number of gaps).  SINGLE_ELEMENT_P is true if there is actually
> +   only one statement, with a gap of COUNT - 1.
> +
> +   Returns true if a suitable permute exists.  */
>
>  bool
> -vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
> +vect_grouped_load_supported (tree vectype, bool single_element_p,
> +                            unsigned HOST_WIDE_INT count)
>  {
>    machine_mode mode = TYPE_MODE (vectype);
>
> +  /* If this is single-element interleaving with an element distance
> +     that leaves unused vector loads around punt - we at least create
> +     very sub-optimal code in that case (and blow up memory,
> +     see PR65518).  */
> +  if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "single-element interleaving not supported "
> +                        "for not adjacent vector loads\n");
> +      return false;
> +    }
> +
>    /* vect_permute_load_chain requires the group size to be equal to 3 or
>       be a power of two.  */
>    if (count != 3 && exact_log2 (count) == -1)
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c
> +++ gcc/tree-vect-loop.c
> @@ -2148,10 +2148,12 @@ again:
>         {
>           vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
>           vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
> +         bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
>           size = STMT_VINFO_GROUP_SIZE (vinfo);
>           vectype = STMT_VINFO_VECTYPE (vinfo);
>           if (! vect_load_lanes_supported (vectype, size)
> -             && ! vect_grouped_load_supported (vectype, size))
> +             && ! vect_grouped_load_supported (vectype, single_element_p,
> +                                               size))
>             return false;
>         }
>      }
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c
> +++ gcc/tree-vect-stmts.c
> @@ -6298,31 +6298,20 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>
>        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>        group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> +      bool single_element_p = (first_stmt == stmt
> +                              && !GROUP_NEXT_ELEMENT (stmt_info));
>
>        if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
>         {
>           if (vect_load_lanes_supported (vectype, group_size))
>             load_lanes_p = true;
> -         else if (!vect_grouped_load_supported (vectype, group_size))
> +         else if (!vect_grouped_load_supported (vectype, single_element_p,
> +                                                group_size))
>             return false;
>         }
>
> -      /* If this is single-element interleaving with an element distance
> -         that leaves unused vector loads around punt - we at least create
> -        very sub-optimal code in that case (and blow up memory,
> -        see PR65518).  */
> -      if (first_stmt == stmt
> -         && !GROUP_NEXT_ELEMENT (stmt_info))
> +      if (single_element_p)
>         {
> -         if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                "single-element interleaving not supported "
> -                                "for not adjacent vector loads\n");
> -             return false;
> -           }
> -
>           /* Single-element interleaving requires peeling for gaps.  */
>           gcc_assert (GROUP_GAP (stmt_info));
>         }
diff mbox

Patch

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h
+++ gcc/tree-vectorizer.h
@@ -1069,7 +1069,7 @@  extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *,
 extern tree vect_create_destination_var (tree, tree);
 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
-extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT);
+extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
 extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *,
                                     gimple_stmt_iterator *, vec<tree> *);
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c
+++ gcc/tree-vect-data-refs.c
@@ -5131,14 +5131,31 @@  vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
 
 /* Function vect_grouped_load_supported.
 
-   Returns TRUE if even and odd permutations are supported,
-   and FALSE otherwise.  */
+   COUNT is the size of the load group (the number of statements plus the
+   number of gaps).  SINGLE_ELEMENT_P is true if there is actually
+   only one statement, with a gap of COUNT - 1.
+
+   Returns true if a suitable permute exists.  */
 
 bool
-vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
+vect_grouped_load_supported (tree vectype, bool single_element_p,
+			     unsigned HOST_WIDE_INT count)
 {
   machine_mode mode = TYPE_MODE (vectype);
 
+  /* If this is single-element interleaving with an element distance
+     that leaves unused vector loads around punt - we at least create
+     very sub-optimal code in that case (and blow up memory,
+     see PR65518).  */
+  if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "single-element interleaving not supported "
+			 "for not adjacent vector loads\n");
+      return false;
+    }
+
   /* vect_permute_load_chain requires the group size to be equal to 3 or
      be a power of two.  */
   if (count != 3 && exact_log2 (count) == -1)
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c
+++ gcc/tree-vect-loop.c
@@ -2148,10 +2148,12 @@  again:
 	{
 	  vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
 	  vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
+	  bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
 	  size = STMT_VINFO_GROUP_SIZE (vinfo);
 	  vectype = STMT_VINFO_VECTYPE (vinfo);
 	  if (! vect_load_lanes_supported (vectype, size)
-	      && ! vect_grouped_load_supported (vectype, size))
+	      && ! vect_grouped_load_supported (vectype, single_element_p,
+						size))
 	    return false;
 	}
     }
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c
+++ gcc/tree-vect-stmts.c
@@ -6298,31 +6298,20 @@  vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+      bool single_element_p = (first_stmt == stmt
+			       && !GROUP_NEXT_ELEMENT (stmt_info));
 
       if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
 	{
 	  if (vect_load_lanes_supported (vectype, group_size))
 	    load_lanes_p = true;
-	  else if (!vect_grouped_load_supported (vectype, group_size))
+	  else if (!vect_grouped_load_supported (vectype, single_element_p,
+						 group_size))
 	    return false;
 	}
 
-      /* If this is single-element interleaving with an element distance
-         that leaves unused vector loads around punt - we at least create
-	 very sub-optimal code in that case (and blow up memory,
-	 see PR65518).  */
-      if (first_stmt == stmt
-	  && !GROUP_NEXT_ELEMENT (stmt_info))
+      if (single_element_p)
 	{
-	  if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "single-element interleaving not supported "
-				 "for not adjacent vector loads\n");
-	      return false;
-	    }
-
 	  /* Single-element interleaving requires peeling for gaps.  */
 	  gcc_assert (GROUP_GAP (stmt_info));
 	}