diff mbox series

Update SLP reductions process.

Message ID 20240718055947.3603177-1-jiawei@iscas.ac.cn
State New
Headers show
Series Update SLP reductions process. | expand

Commit Message

Jiawei July 18, 2024, 5:59 a.m. UTC
This patch improves SLP reduction handling by ensuring proper processing 
even for a single reduction statement.Vectorization instances are now built 
only when there are multiple scalar statements to combine into an SLP
reduction.

An example see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110632,
this patch fix this problem,handling SLP reduction as expected.

gcc/ChangeLog:

	* tree-vect-slp.cc (vect_analyze_slp): Improved handling of SLP reductions
	for single reduction statements.

---
 gcc/tree-vect-slp.cc | 49 +++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

Comments

Richard Biener July 18, 2024, 7:05 a.m. UTC | #1
On Thu, 18 Jul 2024, Jiawei wrote:

> This patch improves SLP reduction handling by ensuring proper processing 
> even for a single reduction statement.Vectorization instances are now built 
> only when there are multiple scalar statements to combine into an SLP
> reduction.
> 
> An example see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110632,
> this patch fix this problem,handling SLP reduction as expected.

That PR is lacking any information so I'm not sure what kind of problem
you are fixing.

But the patch is clearly wrong - we want to use SLP even for single
reduction statements, the non-SLP path is going away.

Richard.

> gcc/ChangeLog:
> 
> 	* tree-vect-slp.cc (vect_analyze_slp): Improved handling of SLP reductions
> 	for single reduction statements.
> 
> ---
>  gcc/tree-vect-slp.cc | 49 +++++++++++++++++++++-----------------------
>  1 file changed, 23 insertions(+), 26 deletions(-)
> 
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 55ae496cbb2..b6dfd9fd32f 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -4054,34 +4054,31 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
>  		}
>  	    }
>  	  /* Save for re-processing on failure.  */
> -	  vec<stmt_vec_info> saved_stmts = scalar_stmts.copy ();
> -	  vec<stmt_vec_info> roots = vNULL;
> -	  vec<tree> remain = vNULL;
> -	  if (scalar_stmts.length () <= 1
> -	      || !vect_build_slp_instance (loop_vinfo,
> -					   slp_inst_kind_reduc_group,
> -					   scalar_stmts, roots, remain,
> -					   max_tree_size, &limit, bst_map,
> -					   NULL))
> -	    {
> -	      if (scalar_stmts.length () <= 1)
> -		scalar_stmts.release ();
> -	      /* Do SLP discovery for single-lane reductions.  */
> -	      for (auto stmt_info : saved_stmts)
> -		{
> -		  vec<stmt_vec_info> stmts;
> -		  vec<stmt_vec_info> roots = vNULL;
> -		  vec<tree> remain = vNULL;
> -		  stmts.create (1);
> -		  stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
> -		  vect_build_slp_instance (vinfo,
> -					   slp_inst_kind_reduc_group,
> -					   stmts, roots, remain,
> -					   max_tree_size, &limit,
> -					   bst_map, NULL);
> +	  if (loop_vinfo->reductions.length() > 0) {
> +	    vec<stmt_vec_info> scalar_stmts;
> +	    scalar_stmts.create(loop_vinfo->reductions.length());
> +
> +	    for (auto next_info : loop_vinfo->reductions) {
> +	      if (STMT_VINFO_DEF_TYPE(next_info) == vect_reduction_def) {
> +		gassign *g = dyn_cast<gassign*>(STMT_VINFO_STMT(next_info));
> +		if (!g || !lane_reducing_op_p(gimple_assign_rhs_code(g))) {
> +		  scalar_stmts.quick_push(next_info);
>  		}
> -	      saved_stmts.release ();
> +	      }
>  	    }
> +
> +	  if (scalar_stmts.length() > 1) {
> +	    vec<stmt_vec_info> roots = vNULL;
> +	    vec<tree> remain = vNULL;
> +	    if (!vect_build_slp_instance(loop_vinfo, slp_inst_kind_reduc_group, scalar_stmts, roots, remain, max_tree_size, &limit, bst_map, NULL)) {
> +		scalar_stmts.release();
> +	    }
> +	    }
> +	  else {
> +	    scalar_stmts.release();
> +	  }
> +	  }
> +
>  	}
>      }
>  
>
Jiawei July 18, 2024, 8:07 a.m. UTC | #2
Thanks for your quick reply,sorry for the missing of information.

I meet this problem in risc-v test:

gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c

I found that this SLP change will add additional instrutions in the 
test, please see this link:

https://godbolt.org/z/5Tfqs9zqj


在 2024/07/18 15:05, Richard Biener 写道:
> On Thu, 18 Jul 2024, Jiawei wrote:
>
>> This patch improves SLP reduction handling by ensuring proper processing
>> even for a single reduction statement.Vectorization instances are now built
>> only when there are multiple scalar statements to combine into an SLP
>> reduction.
>>
>> An example seehttps://gcc.gnu.org/bugzilla/show_bug.cgi?id=110632,
>> this patch fix this problem,handling SLP reduction as expected.
> That PR is lacking any information so I'm not sure what kind of problem
> you are fixing.
>
> But the patch is clearly wrong - we want to use SLP even for single
> reduction statements, the non-SLP path is going away.

I located the problem from the following modifications and made some 
attempts.

But it seems wrong, do you have any suggestions?

-         if (scalar_stmts.length () > 1)
+         /* Save for re-processing on failure.  */
+         vec<stmt_vec_info> saved_stmts = scalar_stmts.copy ();
+         vec<stmt_vec_info> roots = vNULL;
+         vec<tree> remain = vNULL;
+         if (scalar_stmts.length () <= 1
+             || !vect_build_slp_instance (loop_vinfo,
+ slp_inst_kind_reduc_group,
+                                          scalar_stmts, roots, remain,
+                                          max_tree_size, &limit, bst_map,
+                                          NULL))
             {
-             vec<stmt_vec_info> roots = vNULL;
-             vec<tree> remain = vNULL;
-             vect_build_slp_instance (loop_vinfo, 
slp_inst_kind_reduc_group,
-                                      scalar_stmts, roots, remain,

-                                      max_tree_size, &limit, bst_map, 
NULL);

BR,

Jiawei

>
> Richard.
>
>> gcc/ChangeLog:
>>
>> 	* tree-vect-slp.cc (vect_analyze_slp): Improved handling of SLP reductions
>> 	for single reduction statements.
>>
>> ---
>>   gcc/tree-vect-slp.cc | 49 +++++++++++++++++++++-----------------------
>>   1 file changed, 23 insertions(+), 26 deletions(-)
>>
>> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
>> index 55ae496cbb2..b6dfd9fd32f 100644
>> --- a/gcc/tree-vect-slp.cc
>> +++ b/gcc/tree-vect-slp.cc
>> @@ -4054,34 +4054,31 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
>>   		}
>>   	    }
>>   	  /* Save for re-processing on failure.  */
>> -	  vec<stmt_vec_info> saved_stmts = scalar_stmts.copy ();
>> -	  vec<stmt_vec_info> roots = vNULL;
>> -	  vec<tree> remain = vNULL;
>> -	  if (scalar_stmts.length () <= 1
>> -	      || !vect_build_slp_instance (loop_vinfo,
>> -					   slp_inst_kind_reduc_group,
>> -					   scalar_stmts, roots, remain,
>> -					   max_tree_size, &limit, bst_map,
>> -					   NULL))
>> -	    {
>> -	      if (scalar_stmts.length () <= 1)
>> -		scalar_stmts.release ();
>> -	      /* Do SLP discovery for single-lane reductions.  */
>> -	      for (auto stmt_info : saved_stmts)
>> -		{
>> -		  vec<stmt_vec_info> stmts;
>> -		  vec<stmt_vec_info> roots = vNULL;
>> -		  vec<tree> remain = vNULL;
>> -		  stmts.create (1);
>> -		  stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
>> -		  vect_build_slp_instance (vinfo,
>> -					   slp_inst_kind_reduc_group,
>> -					   stmts, roots, remain,
>> -					   max_tree_size, &limit,
>> -					   bst_map, NULL);
>> +	  if (loop_vinfo->reductions.length() > 0) {
>> +	    vec<stmt_vec_info> scalar_stmts;
>> +	    scalar_stmts.create(loop_vinfo->reductions.length());
>> +
>> +	    for (auto next_info : loop_vinfo->reductions) {
>> +	      if (STMT_VINFO_DEF_TYPE(next_info) == vect_reduction_def) {
>> +		gassign *g = dyn_cast<gassign*>(STMT_VINFO_STMT(next_info));
>> +		if (!g || !lane_reducing_op_p(gimple_assign_rhs_code(g))) {
>> +		  scalar_stmts.quick_push(next_info);
>>   		}
>> -	      saved_stmts.release ();
>> +	      }
>>   	    }
>> +
>> +	  if (scalar_stmts.length() > 1) {
>> +	    vec<stmt_vec_info> roots = vNULL;
>> +	    vec<tree> remain = vNULL;
>> +	    if (!vect_build_slp_instance(loop_vinfo, slp_inst_kind_reduc_group, scalar_stmts, roots, remain, max_tree_size, &limit, bst_map, NULL)) {
>> +		scalar_stmts.release();
>> +	    }
>> +	    }
>> +	  else {
>> +	    scalar_stmts.release();
>> +	  }
>> +	  }
>> +
>>   	}
>>       }
>>   
>>
Richard Biener July 18, 2024, 8:27 a.m. UTC | #3
On Thu, 18 Jul 2024, Jiawei wrote:

> Thanks for your quick reply,sorry for the missing of information.
> 
> I meet this problem in risc-v test:
> 
> gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c
> 
> I found that this SLP change will add additional instrutions in the test,
> please see this link:
> 
> https://godbolt.org/z/5Tfqs9zqj
> 
> 
> 在 2024/07/18 15:05, Richard Biener 写道:
> > On Thu, 18 Jul 2024, Jiawei wrote:
> >
> >> This patch improves SLP reduction handling by ensuring proper processing
> >> even for a single reduction statement.Vectorization instances are now built
> >> only when there are multiple scalar statements to combine into an SLP
> >> reduction.
> >>
> >> An example seehttps://gcc.gnu.org/bugzilla/show_bug.cgi?id=110632,
> >> this patch fix this problem,handling SLP reduction as expected.
> > That PR is lacking any information so I'm not sure what kind of problem
> > you are fixing.
> >
> > But the patch is clearly wrong - we want to use SLP even for single
> > reduction statements, the non-SLP path is going away.
> 
> I located the problem from the following modifications and made some attempts.
> 
> But it seems wrong, do you have any suggestions?

You have to analyze why there are additional instructions when we
SLP vectorize the reduction compared to using non-SLP.  I do not
know what those extra instructions do and thus have no suggestion
where to particularly look at.

Btw, the change was made explicitly to uncover this kind of problems
early.

Richard.

> -         if (scalar_stmts.length () > 1)
> +         /* Save for re-processing on failure.  */
> +         vec<stmt_vec_info> saved_stmts = scalar_stmts.copy ();
> +         vec<stmt_vec_info> roots = vNULL;
> +         vec<tree> remain = vNULL;
> +         if (scalar_stmts.length () <= 1
> +             || !vect_build_slp_instance (loop_vinfo,
> + slp_inst_kind_reduc_group,
> +                                          scalar_stmts, roots, remain,
> +                                          max_tree_size, &limit, bst_map,
> +                                          NULL))
>             {
> -             vec<stmt_vec_info> roots = vNULL;
> -             vec<tree> remain = vNULL;
> -             vect_build_slp_instance (loop_vinfo, slp_inst_kind_reduc_group,
> -                                      scalar_stmts, roots, remain,
> 
> -                                      max_tree_size, &limit, bst_map, NULL);
> 
> BR,
> 
> Jiawei
> 
> >
> > Richard.
> >
> >> gcc/ChangeLog:
> >>
> >>  * tree-vect-slp.cc (vect_analyze_slp): Improved handling of SLP reductions
> >>  for single reduction statements.
> >>
> >> ---
> >>   gcc/tree-vect-slp.cc | 49 +++++++++++++++++++++-----------------------
> >>   1 file changed, 23 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> >> index 55ae496cbb2..b6dfd9fd32f 100644
> >> --- a/gcc/tree-vect-slp.cc
> >> +++ b/gcc/tree-vect-slp.cc
> >> @@ -4054,34 +4054,31 @@ vect_analyze_slp (vec_info *vinfo, unsigned
> >> @@ max_tree_size)
> >>        	}
> >>        }
> >>   	  /* Save for re-processing on failure.  */
> >> -	  vec<stmt_vec_info> saved_stmts = scalar_stmts.copy ();
> >> -	  vec<stmt_vec_info> roots = vNULL;
> >> -	  vec<tree> remain = vNULL;
> >> -	  if (scalar_stmts.length () <= 1
> >> -	      || !vect_build_slp_instance (loop_vinfo,
> >> -					   slp_inst_kind_reduc_group,
> >> -					   scalar_stmts, roots, remain,
> >> -					   max_tree_size, &limit, bst_map,
> >> -					   NULL))
> >> -	    {
> >> -	      if (scalar_stmts.length () <= 1)
> >> -		scalar_stmts.release ();
> >> -	      /* Do SLP discovery for single-lane reductions.  */
> >> -	      for (auto stmt_info : saved_stmts)
> >> -		{
> >> -		  vec<stmt_vec_info> stmts;
> >> -		  vec<stmt_vec_info> roots = vNULL;
> >> -		  vec<tree> remain = vNULL;
> >> -		  stmts.create (1);
> >> -		  stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
> >> -		  vect_build_slp_instance (vinfo,
> >> -					   slp_inst_kind_reduc_group,
> >> -					   stmts, roots, remain,
> >> -					   max_tree_size, &limit,
> >> -					   bst_map, NULL);
> >> +	  if (loop_vinfo->reductions.length() > 0) {
> >> +	    vec<stmt_vec_info> scalar_stmts;
> >> +	    scalar_stmts.create(loop_vinfo->reductions.length());
> >> +
> >> +	    for (auto next_info : loop_vinfo->reductions) {
> >> +	      if (STMT_VINFO_DEF_TYPE(next_info) == vect_reduction_def) {
> >> +		gassign *g = dyn_cast<gassign*>(STMT_VINFO_STMT(next_info));
> >> +		if (!g || !lane_reducing_op_p(gimple_assign_rhs_code(g))) {
> >> +		  scalar_stmts.quick_push(next_info);
> >>   		}
> >> -	      saved_stmts.release ();
> >> +	      }
> >>   	    }
> >> +
> >> +	  if (scalar_stmts.length() > 1) {
> >> +	    vec<stmt_vec_info> roots = vNULL;
> >> +	    vec<tree> remain = vNULL;
> >> +	    if (!vect_build_slp_instance(loop_vinfo,
> >> slp_inst_kind_reduc_group, scalar_stmts, roots, remain, max_tree_size,
> >> &limit, bst_map, NULL)) {
> >> +		scalar_stmts.release();
> >> +	    }
> >> +	    }
> >> +	  else {
> >> +	    scalar_stmts.release();
> >> +	  }
> >> +	  }
> >> +
> >>    }
> >>       }
> >>   
> >>
>
diff mbox series

Patch

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 55ae496cbb2..b6dfd9fd32f 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4054,34 +4054,31 @@  vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 		}
 	    }
 	  /* Save for re-processing on failure.  */
-	  vec<stmt_vec_info> saved_stmts = scalar_stmts.copy ();
-	  vec<stmt_vec_info> roots = vNULL;
-	  vec<tree> remain = vNULL;
-	  if (scalar_stmts.length () <= 1
-	      || !vect_build_slp_instance (loop_vinfo,
-					   slp_inst_kind_reduc_group,
-					   scalar_stmts, roots, remain,
-					   max_tree_size, &limit, bst_map,
-					   NULL))
-	    {
-	      if (scalar_stmts.length () <= 1)
-		scalar_stmts.release ();
-	      /* Do SLP discovery for single-lane reductions.  */
-	      for (auto stmt_info : saved_stmts)
-		{
-		  vec<stmt_vec_info> stmts;
-		  vec<stmt_vec_info> roots = vNULL;
-		  vec<tree> remain = vNULL;
-		  stmts.create (1);
-		  stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
-		  vect_build_slp_instance (vinfo,
-					   slp_inst_kind_reduc_group,
-					   stmts, roots, remain,
-					   max_tree_size, &limit,
-					   bst_map, NULL);
+	  if (loop_vinfo->reductions.length() > 0) {
+	    vec<stmt_vec_info> scalar_stmts;
+	    scalar_stmts.create(loop_vinfo->reductions.length());
+
+	    for (auto next_info : loop_vinfo->reductions) {
+	      if (STMT_VINFO_DEF_TYPE(next_info) == vect_reduction_def) {
+		gassign *g = dyn_cast<gassign*>(STMT_VINFO_STMT(next_info));
+		if (!g || !lane_reducing_op_p(gimple_assign_rhs_code(g))) {
+		  scalar_stmts.quick_push(next_info);
 		}
-	      saved_stmts.release ();
+	      }
 	    }
+
+	  if (scalar_stmts.length() > 1) {
+	    vec<stmt_vec_info> roots = vNULL;
+	    vec<tree> remain = vNULL;
+	    if (!vect_build_slp_instance(loop_vinfo, slp_inst_kind_reduc_group, scalar_stmts, roots, remain, max_tree_size, &limit, bst_map, NULL)) {
+		scalar_stmts.release();
+	    }
+	    }
+	  else {
+	    scalar_stmts.release();
+	  }
+	  }
+
 	}
     }