diff mbox series

rs6000: Fix vector_set_var_p9 by considering BE [PR108807]

Message ID 737a5392-29f8-763c-8dc7-b48c36edb1a7@linux.ibm.com
State New
Headers show
Series rs6000: Fix vector_set_var_p9 by considering BE [PR108807] | expand

Commit Message

Kewen.Lin Feb. 17, 2023, 9:55 a.m. UTC
Hi,

As PR108807 exposes, the current handling in function
rs6000_expand_vector_set_var_p9 doesn't take care of big
endianness.  Currently the function is to rotate the
target vector by moving element to-be-set to element 0,
set element 0 with the given val, then rotate back.  To
get the permutation control vector for the rotation, it
makes use of lvsr and lvsl, but the element ordering is
different for BE and LE (like element 0 is the most
significant one on BE while the least significant one on
LE), this patch is to add consideration for BE and make
sure permutation control vectors for rotations are expected.

As tested, it helped to fix the below failures:

FAIL: gcc.target/powerpc/pr79251-run.p9.c execution test
FAIL: gcc.target/powerpc/pr89765-mc.c execution test
FAIL: gcc.target/powerpc/vsx-builtin-10d.c execution test
FAIL: gcc.target/powerpc/vsx-builtin-11d.c execution test
FAIL: gcc.target/powerpc/vsx-builtin-14d.c execution test
FAIL: gcc.target/powerpc/vsx-builtin-16d.c execution test
FAIL: gcc.target/powerpc/vsx-builtin-18d.c execution test
FAIL: gcc.target/powerpc/vsx-builtin-9d.c execution test

Bootstrapped and regtested on powerpc64-linux-gnu P{8,9}
and powerpc64le-linux-gnu P10.

Is it ok for trunk?

BR,
Kewen
-----
	PR target/108807

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_expand_vector_set_var_p9): Fix gen
	function for permutation control vector by considering big endianness.
---
 gcc/config/rs6000/rs6000.cc | 48 +++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 20 deletions(-)

--
2.39.1

Comments

Kewen.Lin March 20, 2023, 6:35 a.m. UTC | #1
Hi,

I'd like to gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612213.html

It's to fix one regression, I think it's stage 4 content.

BR,
Kewen

on 2023/2/17 17:55, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> As PR108807 exposes, the current handling in function
> rs6000_expand_vector_set_var_p9 doesn't take care of big
> endianness.  Currently the function is to rotate the
> target vector by moving element to-be-set to element 0,
> set element 0 with the given val, then rotate back.  To
> get the permutation control vector for the rotation, it
> makes use of lvsr and lvsl, but the element ordering is
> different for BE and LE (like element 0 is the most
> significant one on BE while the least significant one on
> LE), this patch is to add consideration for BE and make
> sure permutation control vectors for rotations are expected.
> 
> As tested, it helped to fix the below failures:
> 
> FAIL: gcc.target/powerpc/pr79251-run.p9.c execution test
> FAIL: gcc.target/powerpc/pr89765-mc.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-10d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-11d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-14d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-16d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-18d.c execution test
> FAIL: gcc.target/powerpc/vsx-builtin-9d.c execution test
> 
> Bootstrapped and regtested on powerpc64-linux-gnu P{8,9}
> and powerpc64le-linux-gnu P10.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -----
> 	PR target/108807
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000.cc (rs6000_expand_vector_set_var_p9): Fix gen
> 	function for permutation control vector by considering big endianness.
> ---
>  gcc/config/rs6000/rs6000.cc | 48 +++++++++++++++++++++----------------
>  1 file changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 16ca3a31757..774eb2963d9 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
> 
>    machine_mode shift_mode;
>    rtx (*gen_ashl)(rtx, rtx, rtx);
> -  rtx (*gen_lvsl)(rtx, rtx);
> -  rtx (*gen_lvsr)(rtx, rtx);
> +  rtx (*gen_pcvr1)(rtx, rtx);
> +  rtx (*gen_pcvr2)(rtx, rtx);
> 
>    if (TARGET_POWERPC64)
>      {
>        shift_mode = DImode;
>        gen_ashl = gen_ashldi3;
> -      gen_lvsl = gen_altivec_lvsl_reg_di;
> -      gen_lvsr = gen_altivec_lvsr_reg_di;
> +      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
> +				   : gen_altivec_lvsr_reg_di;
> +      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
> +				   : gen_altivec_lvsl_reg_di;
>      }
>    else
>      {
>        shift_mode = SImode;
>        gen_ashl = gen_ashlsi3;
> -      gen_lvsl = gen_altivec_lvsl_reg_si;
> -      gen_lvsr = gen_altivec_lvsr_reg_si;
> +      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
> +				   : gen_altivec_lvsr_reg_si;
> +      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
> +				   : gen_altivec_lvsl_reg_si;
>      }
>    /* Generate the IDX for permute shift, width is the vector element size.
>       idx = idx * width.  */
> @@ -7259,25 +7263,29 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
> 
>    emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
> 
> -  /*  lvsr    v1,0,idx.  */
> -  rtx pcvr = gen_reg_rtx (V16QImode);
> -  emit_insn (gen_lvsr (pcvr, tmp));
> -
> -  /*  lvsl    v2,0,idx.  */
> -  rtx pcvl = gen_reg_rtx (V16QImode);
> -  emit_insn (gen_lvsl (pcvl, tmp));
> +  /* Generate one permutation control vector used for rotating the element
> +     at to-insert position to element zero in target vector.  lvsl is
> +     used for big endianness while lvsr is used for little endianness:
> +     lvs[lr]    v1,0,idx.  */
> +  rtx pcvr1 = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_pcvr1 (pcvr1, tmp));
> 
>    rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
> +  rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
> +					   pcvr1);
> +  emit_insn (perm1);
> 
> -  rtx permr
> -    = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
> -  emit_insn (permr);
> -
> +  /* Insert val into element 0 of target vector.  */
>    rs6000_expand_vector_set (target, val, const0_rtx);
> 
> -  rtx perml
> -    = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
> -  emit_insn (perml);
> +  /* Rotate back with a reversed permutation control vector generated from:
> +     lvs[rl]   v2,0,idx.  */
> +  rtx pcvr2 = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_pcvr2 (pcvr2, tmp));
> +
> +  rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
> +					   pcvr2);
> +  emit_insn (perm2);
>  }
> 
>  /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
> --
> 2.39.1
Segher Boessenkool April 3, 2023, 11:44 a.m. UTC | #2
Hi!

On Fri, Feb 17, 2023 at 05:55:04PM +0800, Kewen.Lin wrote:
> As PR108807 exposes, the current handling in function
> rs6000_expand_vector_set_var_p9 doesn't take care of big
> endianness.  Currently the function is to rotate the
> target vector by moving element to-be-set to element 0,
> set element 0 with the given val, then rotate back.  To
> get the permutation control vector for the rotation, it
> makes use of lvsr and lvsl, but the element ordering is
> different for BE and LE (like element 0 is the most
> significant one on BE while the least significant one on
> LE), this patch is to add consideration for BE and make
> sure permutation control vectors for rotations are expected.

> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
> 
>    machine_mode shift_mode;
>    rtx (*gen_ashl)(rtx, rtx, rtx);
> -  rtx (*gen_lvsl)(rtx, rtx);
> -  rtx (*gen_lvsr)(rtx, rtx);
> +  rtx (*gen_pcvr1)(rtx, rtx);
> +  rtx (*gen_pcvr2)(rtx, rtx);

Space before "(" btw, you can fix that at the same time? :-)

What does "pcvr" mean?  You could put that in a short comment?

> +  /* Generate one permutation control vector used for rotating the element

Ah.  Yeah just "/* Permutation control vector */" for the above one
prevents all mysteries :-)

Patch looks good.  Thanks!


Segher
Kewen.Lin April 4, 2023, 5:19 a.m. UTC | #3
Hi Segher,

Thanks for the review!

on 2023/4/3 19:44, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Feb 17, 2023 at 05:55:04PM +0800, Kewen.Lin wrote:
>> As PR108807 exposes, the current handling in function
>> rs6000_expand_vector_set_var_p9 doesn't take care of big
>> endianness.  Currently the function is to rotate the
>> target vector by moving element to-be-set to element 0,
>> set element 0 with the given val, then rotate back.  To
>> get the permutation control vector for the rotation, it
>> makes use of lvsr and lvsl, but the element ordering is
>> different for BE and LE (like element 0 is the most
>> significant one on BE while the least significant one on
>> LE), this patch is to add consideration for BE and make
>> sure permutation control vectors for rotations are expected.
> 
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>>
>>    machine_mode shift_mode;
>>    rtx (*gen_ashl)(rtx, rtx, rtx);
>> -  rtx (*gen_lvsl)(rtx, rtx);
>> -  rtx (*gen_lvsr)(rtx, rtx);
>> +  rtx (*gen_pcvr1)(rtx, rtx);
>> +  rtx (*gen_pcvr2)(rtx, rtx);
> 
> Space before "(" btw, you can fix that at the same time? :-)
> 

Good catch, fixed.

> What does "pcvr" mean?  You could put that in a short comment?
> 
>> +  /* Generate one permutation control vector used for rotating the element
> 
> Ah.  Yeah just "/* Permutation control vector */" for the above one
> prevents all mysteries :-)

One comment line added for gen_* function pointers.

> 
> Patch looks good.  Thanks!
> 

Pushed as r13-6994-gd634e6088f139e, thanks!

BR,
Kewen
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 16ca3a31757..774eb2963d9 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7235,22 +7235,26 @@  rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)

   machine_mode shift_mode;
   rtx (*gen_ashl)(rtx, rtx, rtx);
-  rtx (*gen_lvsl)(rtx, rtx);
-  rtx (*gen_lvsr)(rtx, rtx);
+  rtx (*gen_pcvr1)(rtx, rtx);
+  rtx (*gen_pcvr2)(rtx, rtx);

   if (TARGET_POWERPC64)
     {
       shift_mode = DImode;
       gen_ashl = gen_ashldi3;
-      gen_lvsl = gen_altivec_lvsl_reg_di;
-      gen_lvsr = gen_altivec_lvsr_reg_di;
+      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
+				   : gen_altivec_lvsr_reg_di;
+      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
+				   : gen_altivec_lvsl_reg_di;
     }
   else
     {
       shift_mode = SImode;
       gen_ashl = gen_ashlsi3;
-      gen_lvsl = gen_altivec_lvsl_reg_si;
-      gen_lvsr = gen_altivec_lvsr_reg_si;
+      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
+				   : gen_altivec_lvsr_reg_si;
+      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
+				   : gen_altivec_lvsl_reg_si;
     }
   /* Generate the IDX for permute shift, width is the vector element size.
      idx = idx * width.  */
@@ -7259,25 +7263,29 @@  rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)

   emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));

-  /*  lvsr    v1,0,idx.  */
-  rtx pcvr = gen_reg_rtx (V16QImode);
-  emit_insn (gen_lvsr (pcvr, tmp));
-
-  /*  lvsl    v2,0,idx.  */
-  rtx pcvl = gen_reg_rtx (V16QImode);
-  emit_insn (gen_lvsl (pcvl, tmp));
+  /* Generate one permutation control vector used for rotating the element
+     at to-insert position to element zero in target vector.  lvsl is
+     used for big endianness while lvsr is used for little endianness:
+     lvs[lr]    v1,0,idx.  */
+  rtx pcvr1 = gen_reg_rtx (V16QImode);
+  emit_insn (gen_pcvr1 (pcvr1, tmp));

   rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
+  rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
+					   pcvr1);
+  emit_insn (perm1);

-  rtx permr
-    = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
-  emit_insn (permr);
-
+  /* Insert val into element 0 of target vector.  */
   rs6000_expand_vector_set (target, val, const0_rtx);

-  rtx perml
-    = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
-  emit_insn (perml);
+  /* Rotate back with a reversed permutation control vector generated from:
+     lvs[rl]   v2,0,idx.  */
+  rtx pcvr2 = gen_reg_rtx (V16QImode);
+  emit_insn (gen_pcvr2 (pcvr2, tmp));
+
+  rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
+					   pcvr2);
+  emit_insn (perm2);
 }

 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX