diff mbox series

[v2] vect: Merge loop mask and cond_op mask in fold-left, reduction [PR115382].

Message ID dbbca315-7bb9-427a-a4a6-d94c5e06d34f@gmail.com
State New
Headers show
Series [v2] vect: Merge loop mask and cond_op mask in fold-left, reduction [PR115382]. | expand

Commit Message

Robin Dapp June 10, 2024, 1:31 p.m. UTC
> Actually, as Richard mentioned in the PR, it would probably be better
> to use prepare_vec_mask instead.  It should work in this context too
> and would avoid redundant double masking.

Attached is v2 that uses prepare_vec_mask.

Regtested on riscv64 and armv8.8-a+sve via qemu.
Bootstrap and regtest running on x86 and aarch64.

Regards
 Robin


Currently we discard the cond-op mask when the loop is fully masked
which causes wrong code in
gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
when compiled with
-O3 -march=cascadelake --param vect-partial-vector-usage=2.

This patch ANDs both masks.

gcc/ChangeLog:

	PR tree-optimization/115382

	* tree-vect-loop.cc (vectorize_fold_left_reduction): Merge loop
	mask and cond-op mask.
---
 gcc/tree-vect-loop.cc  | 10 +++++++++-
 gcc/tree-vect-stmts.cc |  2 +-
 gcc/tree-vectorizer.h  |  2 ++
 3 files changed, 12 insertions(+), 2 deletions(-)

Comments

Richard Sandiford June 10, 2024, 1:58 p.m. UTC | #1
Robin Dapp <rdapp.gcc@gmail.com> writes:
>> Actually, as Richard mentioned in the PR, it would probably be better
>> to use prepare_vec_mask instead.  It should work in this context too
>> and would avoid redundant double masking.
>
> Attached is v2 that uses prepare_vec_mask.
>
> Regtested on riscv64 and armv8.8-a+sve via qemu.
> Bootstrap and regtest running on x86 and aarch64.
>
> Regards
>  Robin
>
>
> Currently we discard the cond-op mask when the loop is fully masked
> which causes wrong code in
> gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
> when compiled with
> -O3 -march=cascadelake --param vect-partial-vector-usage=2.
>
> This patch ANDs both masks.
>
> gcc/ChangeLog:
>
> 	PR tree-optimization/115382
>
> 	* tree-vect-loop.cc (vectorize_fold_left_reduction): Merge loop
> 	mask and cond-op mask.
> ---
>  gcc/tree-vect-loop.cc  | 10 +++++++++-
>  gcc/tree-vect-stmts.cc |  2 +-
>  gcc/tree-vectorizer.h  |  2 ++
>  3 files changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 028692614bb..c9b037b8daf 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -7215,7 +7215,15 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
>        tree len = NULL_TREE;
>        tree bias = NULL_TREE;
>        if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> -	mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i);
> +	{
> +	  tree loop_mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
> +					       vec_num, vectype_in, i);
> +	  if (is_cond_op)
> +	    mask = prepare_vec_mask (loop_vinfo, TREE_TYPE (loop_mask),
> +				     loop_mask, vec_opmask[i], gsi);
> +	  else
> +	    mask = loop_mask;
> +	}
>        else if (is_cond_op)
>  	mask = vec_opmask[i];
>        if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 5098b7fab6a..124a3462753 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1643,7 +1643,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
>     MASK_TYPE is the type of both masks.  If new statements are needed,
>     insert them before GSI.  */
>  
> -static tree
> +tree
>  prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
>  		  tree vec_mask, gimple_stmt_iterator *gsi)
>  {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 97ec9c341e7..1f87c6c8ca2 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2508,6 +2508,8 @@ extern void vect_free_slp_tree (slp_tree);
>  extern bool compatible_calls_p (gcall *, gcall *);
>  extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
>  
> +extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, gimple_stmt_iterator *);

Nit: long line.

OK with that fixed, thanks.

Richard

> +
>  /* In tree-vect-patterns.cc.  */
>  extern void
>  vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
diff mbox series

Patch

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 028692614bb..c9b037b8daf 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7215,7 +7215,15 @@  vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
       tree len = NULL_TREE;
       tree bias = NULL_TREE;
       if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
-	mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i);
+	{
+	  tree loop_mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
+					       vec_num, vectype_in, i);
+	  if (is_cond_op)
+	    mask = prepare_vec_mask (loop_vinfo, TREE_TYPE (loop_mask),
+				     loop_mask, vec_opmask[i], gsi);
+	  else
+	    mask = loop_mask;
+	}
       else if (is_cond_op)
 	mask = vec_opmask[i];
       if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 5098b7fab6a..124a3462753 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1643,7 +1643,7 @@  check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
    MASK_TYPE is the type of both masks.  If new statements are needed,
    insert them before GSI.  */
 
-static tree
+tree
 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
 		  tree vec_mask, gimple_stmt_iterator *gsi)
 {
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 97ec9c341e7..1f87c6c8ca2 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2508,6 +2508,8 @@  extern void vect_free_slp_tree (slp_tree);
 extern bool compatible_calls_p (gcall *, gcall *);
 extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
 
+extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, gimple_stmt_iterator *);
+
 /* In tree-vect-patterns.cc.  */
 extern void
 vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);