diff mbox series

veclower: improve selection of vector mode when lowering [PR 112787]

Message ID 3350dc3b-3471-453b-b631-810939609ba9@arm.com
State New
Headers show
Series veclower: improve selection of vector mode when lowering [PR 112787] | expand

Commit Message

Andre Vieira (lists) Dec. 6, 2023, 6:49 p.m. UTC
Hi,

This patch addresses the issue reported in PR target/112787 by improving the
compute type selection.  We do this by not considering types with more 
elements
than the type we are lowering since we'd reject such types anyway.

gcc/ChangeLog:

	PR target/112787
	* tree-vect-generic (type_for_widest_vector_mode): Add a parameter to
	control maximum amount of elements in resulting vector mode.
	(get_compute_type): Restrict vector_compute_type to a mode no wider
	than the original compute type.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/pr112787.c: New test.

Bootstrapped and regression tested on aarch64-unknown-linux-gnu and 
x86_64-pc-linux-gnu.

Is this OK for trunk?

Kind regards,
Andre Vieira

Comments

Richard Biener Dec. 7, 2023, 7:45 a.m. UTC | #1
On Wed, 6 Dec 2023, Andre Vieira (lists) wrote:

> Hi,
> 
> This patch addresses the issue reported in PR target/112787 by improving the
> compute type selection.  We do this by not considering types with more
> elements
> than the type we are lowering since we'd reject such types anyway.
> 
> gcc/ChangeLog:
> 
> 	PR target/112787
> 	* tree-vect-generic (type_for_widest_vector_mode): Add a parameter to
> 	control maximum amount of elements in resulting vector mode.
> 	(get_compute_type): Restrict vector_compute_type to a mode no wider
> 	than the original compute type.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/aarch64/pr112787.c: New test.
> 
> Bootstrapped and regression tested on aarch64-unknown-linux-gnu and
> x86_64-pc-linux-gnu.
> 
> Is this OK for trunk?

@@ -1347,7 +1347,7 @@ optimize_vector_constructor (gimple_stmt_iterator 
*gsi)
    TYPE, or NULL_TREE if none is found.  */

Can you improve the function comment?  It also doesn't mention OP ...

 static tree
-type_for_widest_vector_mode (tree type, optab op)
+type_for_widest_vector_mode (tree type, optab op, poly_int64 max_nunits = 
0)
 {
   machine_mode inner_mode = TYPE_MODE (type);
   machine_mode best_mode = VOIDmode, mode;
@@ -1371,7 +1371,9 @@ type_for_widest_vector_mode (tree type, optab op)
   FOR_EACH_MODE_FROM (mode, mode)
     if (GET_MODE_INNER (mode) == inner_mode
        && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
-       && optab_handler (op, mode) != CODE_FOR_nothing)
+       && optab_handler (op, mode) != CODE_FOR_nothing
+       && (known_eq (max_nunits, 0)
+           || known_lt (GET_MODE_NUNITS (mode), max_nunits)))

max_nunits suggests that known_le would be appropriate instead.

I see the only other caller with similar "problems":

        }
      /* Can't use get_compute_type here, as supportable_convert_operation
         doesn't necessarily use an optab and needs two arguments.  */
      tree vec_compute_type
        = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
      if (vec_compute_type
          && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
          && subparts_gt (arg_type, vec_compute_type))

so please do not default to 0 but adjust this one as well.  It also
seems you then can remove the subparts_gt guards on both
vec_compute_type uses.

I think the API would be cleaner if we'd pass the original vector type
we can then extract TYPE_VECTOR_SUBPARTS from, avoiding the extra arg.

No?

Thanks,
Richard.
Andre Vieira (lists) Dec. 20, 2023, 2:29 p.m. UTC | #2
Thanks, fully agree with all comments.

gcc/ChangeLog:

	PR target/112787
	* tree-vect-generic (type_for_widest_vector_mode): Change function
         to use original vector type and check widest vector mode has at 
most
         the same number of elements.
	(get_compute_type): Pass original vector type rather than the element
         type to type_for_widest_vector_mode and remove now obsolete check
         for the number of elements.

On 07/12/2023 07:45, Richard Biener wrote:
> On Wed, 6 Dec 2023, Andre Vieira (lists) wrote:
> 
>> Hi,
>>
>> This patch addresses the issue reported in PR target/112787 by improving the
>> compute type selection.  We do this by not considering types with more
>> elements
>> than the type we are lowering since we'd reject such types anyway.
>>
>> gcc/ChangeLog:
>>
>> 	PR target/112787
>> 	* tree-vect-generic (type_for_widest_vector_mode): Add a parameter to
>> 	control maximum amount of elements in resulting vector mode.
>> 	(get_compute_type): Restrict vector_compute_type to a mode no wider
>> 	than the original compute type.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/aarch64/pr112787.c: New test.
>>
>> Bootstrapped and regression tested on aarch64-unknown-linux-gnu and
>> x86_64-pc-linux-gnu.
>>
>> Is this OK for trunk?
> 
> @@ -1347,7 +1347,7 @@ optimize_vector_constructor (gimple_stmt_iterator
> *gsi)
>      TYPE, or NULL_TREE if none is found.  */
> 
> Can you improve the function comment?  It also doesn't mention OP ...
> 
>   static tree
> -type_for_widest_vector_mode (tree type, optab op)
> +type_for_widest_vector_mode (tree type, optab op, poly_int64 max_nunits =
> 0)
>   {
>     machine_mode inner_mode = TYPE_MODE (type);
>     machine_mode best_mode = VOIDmode, mode;
> @@ -1371,7 +1371,9 @@ type_for_widest_vector_mode (tree type, optab op)
>     FOR_EACH_MODE_FROM (mode, mode)
>       if (GET_MODE_INNER (mode) == inner_mode
>          && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
> -       && optab_handler (op, mode) != CODE_FOR_nothing)
> +       && optab_handler (op, mode) != CODE_FOR_nothing
> +       && (known_eq (max_nunits, 0)
> +           || known_lt (GET_MODE_NUNITS (mode), max_nunits)))
> 
> max_nunits suggests that known_le would be appropriate instead.
> 
> I see the only other caller with similar "problems":
> 
>          }
>        /* Can't use get_compute_type here, as supportable_convert_operation
>           doesn't necessarily use an optab and needs two arguments.  */
>        tree vec_compute_type
>          = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
>        if (vec_compute_type
>            && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
>            && subparts_gt (arg_type, vec_compute_type))
> 
> so please do not default to 0 but adjust this one as well.  It also
> seems you then can remove the subparts_gt guards on both
> vec_compute_type uses.
> 
> I think the API would be cleaner if we'd pass the original vector type
> we can then extract TYPE_VECTOR_SUBPARTS from, avoiding the extra arg.
> 
> No?
> 
> Thanks,
> Richard.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr112787.c b/gcc/testsuite/gcc.target/aarch64/pr112787.c
new file mode 100644
index 0000000000000000000000000000000000000000..caca1bf7ef447e4489b2c134d7200a4afd16763f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr112787.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -march=armv8-a+sve -mcpu=neoverse-n2" } */
+
+typedef int __attribute__((__vector_size__ (64))) vec;
+
+vec fn (vec a, vec b)
+{
+  return a + b;
+}
+
+/* { dg-final { scan-assembler-times {add\tv[0-9]+} 4 } } */
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index a7e6cb87a5e31e3dd2a893ea5652eeebf8d5d214..c906eb3521ea01fd2bdfc89c3476d02c555cf8cc 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1343,12 +1343,16 @@ optimize_vector_constructor (gimple_stmt_iterator *gsi)
   gsi_replace (gsi, g, false);
 }
 
-/* Return a type for the widest vector mode whose components are of type
-   TYPE, or NULL_TREE if none is found.  */
+/* Return a type for the widest vector mode with the same element type as
+   type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
+   ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
+   with optab OP, or return NULL_TREE if none is found.  */
 
 static tree
-type_for_widest_vector_mode (tree type, optab op)
+type_for_widest_vector_mode (tree original_vector_type, optab op)
 {
+  gcc_assert (VECTOR_TYPE_P (original_vector_type));
+  tree type = TREE_TYPE (original_vector_type);
   machine_mode inner_mode = TYPE_MODE (type);
   machine_mode best_mode = VOIDmode, mode;
   poly_int64 best_nunits = 0;
@@ -1371,7 +1375,9 @@ type_for_widest_vector_mode (tree type, optab op)
   FOR_EACH_MODE_FROM (mode, mode)
     if (GET_MODE_INNER (mode) == inner_mode
 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
-	&& optab_handler (op, mode) != CODE_FOR_nothing)
+	&& optab_handler (op, mode) != CODE_FOR_nothing
+	&& known_le (GET_MODE_NUNITS (mode),
+		     TYPE_VECTOR_SUBPARTS (original_vector_type)))
       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
 
   if (best_mode == VOIDmode)
@@ -1702,9 +1708,8 @@ get_compute_type (enum tree_code code, optab op, tree type)
 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
     {
       tree vector_compute_type
-	= type_for_widest_vector_mode (TREE_TYPE (type), op);
+	= type_for_widest_vector_mode (type, op);
       if (vector_compute_type != NULL_TREE
-	  && subparts_gt (compute_type, vector_compute_type)
 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
 	      != CODE_FOR_nothing))
@@ -1877,10 +1882,9 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
       /* Can't use get_compute_type here, as supportable_convert_operation
 	 doesn't necessarily use an optab and needs two arguments.  */
       tree vec_compute_type
-	= type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
+	= type_for_widest_vector_mode (arg_type, mov_optab);
       if (vec_compute_type
-	  && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
-	  && subparts_gt (arg_type, vec_compute_type))
+	  && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
 	{
 	  unsigned HOST_WIDE_INT nelts
 	    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
Richard Biener Dec. 20, 2023, 2:30 p.m. UTC | #3
On Wed, 20 Dec 2023, Andre Vieira (lists) wrote:

> Thanks, fully agree with all comments.
> 
> gcc/ChangeLog:
> 
> 	PR target/112787
> 	* tree-vect-generic (type_for_widest_vector_mode): Change function
>         to use original vector type and check widest vector mode has at most
> 	         the same number of elements.
> 	(get_compute_type): Pass original vector type rather than the element
>         type to type_for_widest_vector_mode and remove now obsolete check
>         for the number of elements.

OK.

Richard.

> On 07/12/2023 07:45, Richard Biener wrote:
> > On Wed, 6 Dec 2023, Andre Vieira (lists) wrote:
> > 
> >> Hi,
> >>
> >> This patch addresses the issue reported in PR target/112787 by improving
> >> the
> >> compute type selection.  We do this by not considering types with more
> >> elements
> >> than the type we are lowering since we'd reject such types anyway.
> >>
> >> gcc/ChangeLog:
> >>
> >>  PR target/112787
> >>  * tree-vect-generic (type_for_widest_vector_mode): Add a parameter to
> >>  control maximum amount of elements in resulting vector mode.
> >>  (get_compute_type): Restrict vector_compute_type to a mode no wider
> >>  than the original compute type.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>  * gcc.target/aarch64/pr112787.c: New test.
> >>
> >> Bootstrapped and regression tested on aarch64-unknown-linux-gnu and
> >> x86_64-pc-linux-gnu.
> >>
> >> Is this OK for trunk?
> > 
> > @@ -1347,7 +1347,7 @@ optimize_vector_constructor (gimple_stmt_iterator
> > *gsi)
> >      TYPE, or NULL_TREE if none is found.  */
> > 
> > Can you improve the function comment?  It also doesn't mention OP ...
> > 
> >   static tree
> > -type_for_widest_vector_mode (tree type, optab op)
> > +type_for_widest_vector_mode (tree type, optab op, poly_int64 max_nunits =
> > 0)
> >   {
> >     machine_mode inner_mode = TYPE_MODE (type);
> >     machine_mode best_mode = VOIDmode, mode;
> > @@ -1371,7 +1371,9 @@ type_for_widest_vector_mode (tree type, optab op)
> >     FOR_EACH_MODE_FROM (mode, mode)
> >       if (GET_MODE_INNER (mode) == inner_mode
> >          && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
> > -       && optab_handler (op, mode) != CODE_FOR_nothing)
> > +       && optab_handler (op, mode) != CODE_FOR_nothing
> > +       && (known_eq (max_nunits, 0)
> > +           || known_lt (GET_MODE_NUNITS (mode), max_nunits)))
> > 
> > max_nunits suggests that known_le would be appropriate instead.
> > 
> > I see the only other caller with similar "problems":
> > 
> >          }
> >        /* Can't use get_compute_type here, as supportable_convert_operation
> >           doesn't necessarily use an optab and needs two arguments.  */
> >        tree vec_compute_type
> >          = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
> >        if (vec_compute_type
> >            && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
> >            && subparts_gt (arg_type, vec_compute_type))
> > 
> > so please do not default to 0 but adjust this one as well.  It also
> > seems you then can remove the subparts_gt guards on both
> > vec_compute_type uses.
> > 
> > I think the API would be cleaner if we'd pass the original vector type
> > we can then extract TYPE_VECTOR_SUBPARTS from, avoiding the extra arg.
> > 
> > No?
> > 
> > Thanks,
> > Richard.
>
Andre Vieira (lists) Feb. 19, 2024, 2 p.m. UTC | #4
Hi all,

OK to backport this to gcc-12 and gcc-13? Patch applies cleanly, 
bootstrapped and regression tested on aarch64-unknown-linux-gnu. Only 
change is in the testcase as I had to use -march=armv9-a because 
-march=armv8-a+sve conflicts with -mcpu=neoverse-n2 in previous gcc 
versions.

Kind Regards,
Andre

On 20/12/2023 14:30, Richard Biener wrote:
> On Wed, 20 Dec 2023, Andre Vieira (lists) wrote:
> 
>> Thanks, fully agree with all comments.
>>
>> gcc/ChangeLog:
>>
>> 	PR target/112787
>> 	* tree-vect-generic (type_for_widest_vector_mode): Change function
>>          to use original vector type and check widest vector mode has at most
>> 	         the same number of elements.
>> 	(get_compute_type): Pass original vector type rather than the element
>>          type to type_for_widest_vector_mode and remove now obsolete check
>>          for the number of elements.
> 
> OK.
> 
> Richard.
> 
>> On 07/12/2023 07:45, Richard Biener wrote:
>>> On Wed, 6 Dec 2023, Andre Vieira (lists) wrote:
>>>
>>>> Hi,
>>>>
>>>> This patch addresses the issue reported in PR target/112787 by improving
>>>> the
>>>> compute type selection.  We do this by not considering types with more
>>>> elements
>>>> than the type we are lowering since we'd reject such types anyway.
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>>   PR target/112787
>>>>   * tree-vect-generic (type_for_widest_vector_mode): Add a parameter to
>>>>   control maximum amount of elements in resulting vector mode.
>>>>   (get_compute_type): Restrict vector_compute_type to a mode no wider
>>>>   than the original compute type.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>>   * gcc.target/aarch64/pr112787.c: New test.
>>>>
>>>> Bootstrapped and regression tested on aarch64-unknown-linux-gnu and
>>>> x86_64-pc-linux-gnu.
>>>>
>>>> Is this OK for trunk?
>>>
>>> @@ -1347,7 +1347,7 @@ optimize_vector_constructor (gimple_stmt_iterator
>>> *gsi)
>>>       TYPE, or NULL_TREE if none is found.  */
>>>
>>> Can you improve the function comment?  It also doesn't mention OP ...
>>>
>>>    static tree
>>> -type_for_widest_vector_mode (tree type, optab op)
>>> +type_for_widest_vector_mode (tree type, optab op, poly_int64 max_nunits =
>>> 0)
>>>    {
>>>      machine_mode inner_mode = TYPE_MODE (type);
>>>      machine_mode best_mode = VOIDmode, mode;
>>> @@ -1371,7 +1371,9 @@ type_for_widest_vector_mode (tree type, optab op)
>>>      FOR_EACH_MODE_FROM (mode, mode)
>>>        if (GET_MODE_INNER (mode) == inner_mode
>>>           && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
>>> -       && optab_handler (op, mode) != CODE_FOR_nothing)
>>> +       && optab_handler (op, mode) != CODE_FOR_nothing
>>> +       && (known_eq (max_nunits, 0)
>>> +           || known_lt (GET_MODE_NUNITS (mode), max_nunits)))
>>>
>>> max_nunits suggests that known_le would be appropriate instead.
>>>
>>> I see the only other caller with similar "problems":
>>>
>>>           }
>>>         /* Can't use get_compute_type here, as supportable_convert_operation
>>>            doesn't necessarily use an optab and needs two arguments.  */
>>>         tree vec_compute_type
>>>           = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
>>>         if (vec_compute_type
>>>             && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
>>>             && subparts_gt (arg_type, vec_compute_type))
>>>
>>> so please do not default to 0 but adjust this one as well.  It also
>>> seems you then can remove the subparts_gt guards on both
>>> vec_compute_type uses.
>>>
>>> I think the API would be cleaner if we'd pass the original vector type
>>> we can then extract TYPE_VECTOR_SUBPARTS from, avoiding the extra arg.
>>>
>>> No?
>>>
>>> Thanks,
>>> Richard.
>>
>
Richard Biener Feb. 19, 2024, 2:40 p.m. UTC | #5
On Mon, 19 Feb 2024, Andre Vieira (lists) wrote:

> Hi all,
> 
> OK to backport this to gcc-12 and gcc-13? Patch applies cleanly, bootstrapped
> and regression tested on aarch64-unknown-linux-gnu. Only change is in the
> testcase as I had to use -march=armv9-a because -march=armv8-a+sve conflicts
> with -mcpu=neoverse-n2 in previous gcc versions.

Yes.

Thanks,
Richard.

> Kind Regards,
> Andre
> 
> On 20/12/2023 14:30, Richard Biener wrote:
> > On Wed, 20 Dec 2023, Andre Vieira (lists) wrote:
> > 
> >> Thanks, fully agree with all comments.
> >>
> >> gcc/ChangeLog:
> >>
> >>  PR target/112787
> >>  * tree-vect-generic (type_for_widest_vector_mode): Change function
> >>          to use original vector type and check widest vector mode has at
> >>          most
> >>           the same number of elements.
> >>  (get_compute_type): Pass original vector type rather than the element
> >>          type to type_for_widest_vector_mode and remove now obsolete check
> >>          for the number of elements.
> > 
> > OK.
> > 
> > Richard.
> > 
> >> On 07/12/2023 07:45, Richard Biener wrote:
> >>> On Wed, 6 Dec 2023, Andre Vieira (lists) wrote:
> >>>
> >>>> Hi,
> >>>>
> >>>> This patch addresses the issue reported in PR target/112787 by improving
> >>>> the
> >>>> compute type selection.  We do this by not considering types with more
> >>>> elements
> >>>> than the type we are lowering since we'd reject such types anyway.
> >>>>
> >>>> gcc/ChangeLog:
> >>>>
> >>>>   PR target/112787
> >>>>   * tree-vect-generic (type_for_widest_vector_mode): Add a parameter to
> >>>>   control maximum amount of elements in resulting vector mode.
> >>>>   (get_compute_type): Restrict vector_compute_type to a mode no wider
> >>>>   than the original compute type.
> >>>>
> >>>> gcc/testsuite/ChangeLog:
> >>>>
> >>>>   * gcc.target/aarch64/pr112787.c: New test.
> >>>>
> >>>> Bootstrapped and regression tested on aarch64-unknown-linux-gnu and
> >>>> x86_64-pc-linux-gnu.
> >>>>
> >>>> Is this OK for trunk?
> >>>
> >>> @@ -1347,7 +1347,7 @@ optimize_vector_constructor (gimple_stmt_iterator
> >>> *gsi)
> >>>       TYPE, or NULL_TREE if none is found.  */
> >>>
> >>> Can you improve the function comment?  It also doesn't mention OP ...
> >>>
> >>>    static tree
> >>> -type_for_widest_vector_mode (tree type, optab op)
> >>> +type_for_widest_vector_mode (tree type, optab op, poly_int64 max_nunits =
> >>> 0)
> >>>    {
> >>>      machine_mode inner_mode = TYPE_MODE (type);
> >>>      machine_mode best_mode = VOIDmode, mode;
> >>> @@ -1371,7 +1371,9 @@ type_for_widest_vector_mode (tree type, optab op)
> >>>      FOR_EACH_MODE_FROM (mode, mode)
> >>>        if (GET_MODE_INNER (mode) == inner_mode
> >>>           && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
> >>> -       && optab_handler (op, mode) != CODE_FOR_nothing)
> >>> +       && optab_handler (op, mode) != CODE_FOR_nothing
> >>> +       && (known_eq (max_nunits, 0)
> >>> +           || known_lt (GET_MODE_NUNITS (mode), max_nunits)))
> >>>
> >>> max_nunits suggests that known_le would be appropriate instead.
> >>>
> >>> I see the only other caller with similar "problems":
> >>>
> >>>           }
> >>>         /* Can't use get_compute_type here, as
> >>>         supportable_convert_operation
> >>>            doesn't necessarily use an optab and needs two arguments.  */
> >>>         tree vec_compute_type
> >>>           = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
> >>>         if (vec_compute_type
> >>>             && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
> >>>             && subparts_gt (arg_type, vec_compute_type))
> >>>
> >>> so please do not default to 0 but adjust this one as well.  It also
> >>> seems you then can remove the subparts_gt guards on both
> >>> vec_compute_type uses.
> >>>
> >>> I think the API would be cleaner if we'd pass the original vector type
> >>> we can then extract TYPE_VECTOR_SUBPARTS from, avoiding the extra arg.
> >>>
> >>> No?
> >>>
> >>> Thanks,
> >>> Richard.
> >>
> > 
> 
>
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/pr112787.c b/gcc/testsuite/gcc.target/aarch64/pr112787.c
new file mode 100644
index 0000000000000000000000000000000000000000..caca1bf7ef447e4489b2c134d7200a4afd16763f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr112787.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -march=armv8-a+sve -mcpu=neoverse-n2" } */
+
+typedef int __attribute__((__vector_size__ (64))) vec;
+
+vec fn (vec a, vec b)
+{
+  return a + b;
+}
+
+/* { dg-final { scan-assembler-times {add\tv[0-9]+} 4 } } */
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index a7e6cb87a5e31e3dd2a893ea5652eeebf8d5d214..2dbf3c8f5f64f2623944110dbc371fe0944198f0 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1347,7 +1347,7 @@  optimize_vector_constructor (gimple_stmt_iterator *gsi)
    TYPE, or NULL_TREE if none is found.  */
 
 static tree
-type_for_widest_vector_mode (tree type, optab op)
+type_for_widest_vector_mode (tree type, optab op, poly_int64 max_nunits = 0)
 {
   machine_mode inner_mode = TYPE_MODE (type);
   machine_mode best_mode = VOIDmode, mode;
@@ -1371,7 +1371,9 @@  type_for_widest_vector_mode (tree type, optab op)
   FOR_EACH_MODE_FROM (mode, mode)
     if (GET_MODE_INNER (mode) == inner_mode
 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
-	&& optab_handler (op, mode) != CODE_FOR_nothing)
+	&& optab_handler (op, mode) != CODE_FOR_nothing
+	&& (known_eq (max_nunits, 0)
+	    || known_lt (GET_MODE_NUNITS (mode), max_nunits)))
       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
 
   if (best_mode == VOIDmode)
@@ -1702,7 +1704,8 @@  get_compute_type (enum tree_code code, optab op, tree type)
 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
     {
       tree vector_compute_type
-	= type_for_widest_vector_mode (TREE_TYPE (type), op);
+	= type_for_widest_vector_mode (TREE_TYPE (type), op,
+				       TYPE_VECTOR_SUBPARTS (compute_type));
       if (vector_compute_type != NULL_TREE
 	  && subparts_gt (compute_type, vector_compute_type)
 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)