Message ID | 87poriltqh.fsf@e105548-lin.cambridge.arm.com |
---|---|
State | New |
Headers | show |
On Wed, Jun 15, 2016 at 10:52 AM, Richard Sandiford <richard.sandiford@arm.com> wrote: > This patch moves the fix for PR65518 to the code that checks whether > load-and-permute operations are supported. If the group size is > greater than the vectorisation factor, it would still be possible > to fall back to elementwise loads (as for strided groups) rather > than fail vectorisation entirely. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Ok. Thanks, Richard. > Thanks, > Richard > > > gcc/ > * tree-vectorizer.h (vect_grouped_load_supported): Add a > single_element_p parameter. > * tree-vect-data-refs.c (vect_grouped_load_supported): Likewise. > Check the PR65518 case here rather than in vectorizable_load. > * tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly. > * tree-vect-stmts.c (vectorizable_load): Likewise. > > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h > +++ gcc/tree-vectorizer.h > @@ -1069,7 +1069,7 @@ extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *, > extern tree vect_create_destination_var (tree, tree); > extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); > extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); > -extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT); > +extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); > extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); > extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *, > gimple_stmt_iterator *, vec<tree> *); > Index: gcc/tree-vect-data-refs.c > =================================================================== > --- gcc/tree-vect-data-refs.c > +++ gcc/tree-vect-data-refs.c > @@ -5131,14 +5131,31 @@ vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi, > > /* Function vect_grouped_load_supported. > > - Returns TRUE if even and odd permutations are supported, > - and FALSE otherwise. */ > + COUNT is the size of the load group (the number of statements plus the > + number of gaps). SINGLE_ELEMENT_P is true if there is actually > + only one statement, with a gap of COUNT - 1. > + > + Returns true if a suitable permute exists. */ > > bool > -vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count) > +vect_grouped_load_supported (tree vectype, bool single_element_p, > + unsigned HOST_WIDE_INT count) > { > machine_mode mode = TYPE_MODE (vectype); > > + /* If this is single-element interleaving with an element distance > + that leaves unused vector loads around punt - we at least create > + very sub-optimal code in that case (and blow up memory, > + see PR65518). */ > + if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype)) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "single-element interleaving not supported " > + "for not adjacent vector loads\n"); > + return false; > + } > + > /* vect_permute_load_chain requires the group size to be equal to 3 or > be a power of two. */ > if (count != 3 && exact_log2 (count) == -1) > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c > +++ gcc/tree-vect-loop.c > @@ -2148,10 +2148,12 @@ again: > { > vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); > vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo)); > + bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo); > size = STMT_VINFO_GROUP_SIZE (vinfo); > vectype = STMT_VINFO_VECTYPE (vinfo); > if (! vect_load_lanes_supported (vectype, size) > - && ! vect_grouped_load_supported (vectype, size)) > + && ! vect_grouped_load_supported (vectype, single_element_p, > + size)) > return false; > } > } > Index: gcc/tree-vect-stmts.c > =================================================================== > --- gcc/tree-vect-stmts.c > +++ gcc/tree-vect-stmts.c > @@ -6298,31 +6298,20 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, > > first_stmt = GROUP_FIRST_ELEMENT (stmt_info); > group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); > + bool single_element_p = (first_stmt == stmt > + && !GROUP_NEXT_ELEMENT (stmt_info)); > > if (!slp && !STMT_VINFO_STRIDED_P (stmt_info)) > { > if (vect_load_lanes_supported (vectype, group_size)) > load_lanes_p = true; > - else if (!vect_grouped_load_supported (vectype, group_size)) > + else if (!vect_grouped_load_supported (vectype, single_element_p, > + group_size)) > return false; > } > > - /* If this is single-element interleaving with an element distance > - that leaves unused vector loads around punt - we at least create > - very sub-optimal code in that case (and blow up memory, > - see PR65518). */ > - if (first_stmt == stmt > - && !GROUP_NEXT_ELEMENT (stmt_info)) > + if (single_element_p) > { > - if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "single-element interleaving not supported " > - "for not adjacent vector loads\n"); > - return false; > - } > - > /* Single-element interleaving requires peeling for gaps. */ > gcc_assert (GROUP_GAP (stmt_info)); > }
Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h +++ gcc/tree-vectorizer.h @@ -1069,7 +1069,7 @@ extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *, extern tree vect_create_destination_var (tree, tree); extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); -extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT); +extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *, gimple_stmt_iterator *, vec<tree> *); Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c +++ gcc/tree-vect-data-refs.c @@ -5131,14 +5131,31 @@ vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi, /* Function vect_grouped_load_supported. - Returns TRUE if even and odd permutations are supported, - and FALSE otherwise. */ + COUNT is the size of the load group (the number of statements plus the + number of gaps). SINGLE_ELEMENT_P is true if there is actually + only one statement, with a gap of COUNT - 1. + + Returns true if a suitable permute exists. */ bool -vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count) +vect_grouped_load_supported (tree vectype, bool single_element_p, + unsigned HOST_WIDE_INT count) { machine_mode mode = TYPE_MODE (vectype); + /* If this is single-element interleaving with an element distance + that leaves unused vector loads around punt - we at least create + very sub-optimal code in that case (and blow up memory, + see PR65518). */ + if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "single-element interleaving not supported " + "for not adjacent vector loads\n"); + return false; + } + /* vect_permute_load_chain requires the group size to be equal to 3 or be a power of two. */ if (count != 3 && exact_log2 (count) == -1) Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c +++ gcc/tree-vect-loop.c @@ -2148,10 +2148,12 @@ again: { vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo)); + bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo); size = STMT_VINFO_GROUP_SIZE (vinfo); vectype = STMT_VINFO_VECTYPE (vinfo); if (! vect_load_lanes_supported (vectype, size) - && ! vect_grouped_load_supported (vectype, size)) + && ! vect_grouped_load_supported (vectype, single_element_p, + size)) return false; } } Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c +++ gcc/tree-vect-stmts.c @@ -6298,31 +6298,20 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, first_stmt = GROUP_FIRST_ELEMENT (stmt_info); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + bool single_element_p = (first_stmt == stmt + && !GROUP_NEXT_ELEMENT (stmt_info)); if (!slp && !STMT_VINFO_STRIDED_P (stmt_info)) { if (vect_load_lanes_supported (vectype, group_size)) load_lanes_p = true; - else if (!vect_grouped_load_supported (vectype, group_size)) + else if (!vect_grouped_load_supported (vectype, single_element_p, + group_size)) return false; } - /* If this is single-element interleaving with an element distance - that leaves unused vector loads around punt - we at least create - very sub-optimal code in that case (and blow up memory, - see PR65518). */ - if (first_stmt == stmt - && !GROUP_NEXT_ELEMENT (stmt_info)) + if (single_element_p) { - if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "single-element interleaving not supported " - "for not adjacent vector loads\n"); - return false; - } - /* Single-element interleaving requires peeling for gaps. */ gcc_assert (GROUP_GAP (stmt_info)); }