Message ID | 737a5392-29f8-763c-8dc7-b48c36edb1a7@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Fix vector_set_var_p9 by considering BE [PR108807] | expand |
Hi, I'd like to gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612213.html It's to fix one regression, I think it's stage 4 content. BR, Kewen on 2023/2/17 17:55, Kewen.Lin via Gcc-patches wrote: > Hi, > > As PR108807 exposes, the current handling in function > rs6000_expand_vector_set_var_p9 doesn't take care of big > endianness. Currently the function is to rotate the > target vector by moving element to-be-set to element 0, > set element 0 with the given val, then rotate back. To > get the permutation control vector for the rotation, it > makes use of lvsr and lvsl, but the element ordering is > different for BE and LE (like element 0 is the most > significant one on BE while the least significant one on > LE), this patch is to add consideration for BE and make > sure permutation control vectors for rotations are expected. > > As tested, it helped to fix the below failures: > > FAIL: gcc.target/powerpc/pr79251-run.p9.c execution test > FAIL: gcc.target/powerpc/pr89765-mc.c execution test > FAIL: gcc.target/powerpc/vsx-builtin-10d.c execution test > FAIL: gcc.target/powerpc/vsx-builtin-11d.c execution test > FAIL: gcc.target/powerpc/vsx-builtin-14d.c execution test > FAIL: gcc.target/powerpc/vsx-builtin-16d.c execution test > FAIL: gcc.target/powerpc/vsx-builtin-18d.c execution test > FAIL: gcc.target/powerpc/vsx-builtin-9d.c execution test > > Bootstrapped and regtested on powerpc64-linux-gnu P{8,9} > and powerpc64le-linux-gnu P10. > > Is it ok for trunk? > > BR, > Kewen > ----- > PR target/108807 > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_expand_vector_set_var_p9): Fix gen > function for permutation control vector by considering big endianness. > --- > gcc/config/rs6000/rs6000.cc | 48 +++++++++++++++++++++---------------- > 1 file changed, 28 insertions(+), 20 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index 16ca3a31757..774eb2963d9 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) > > machine_mode shift_mode; > rtx (*gen_ashl)(rtx, rtx, rtx); > - rtx (*gen_lvsl)(rtx, rtx); > - rtx (*gen_lvsr)(rtx, rtx); > + rtx (*gen_pcvr1)(rtx, rtx); > + rtx (*gen_pcvr2)(rtx, rtx); > > if (TARGET_POWERPC64) > { > shift_mode = DImode; > gen_ashl = gen_ashldi3; > - gen_lvsl = gen_altivec_lvsl_reg_di; > - gen_lvsr = gen_altivec_lvsr_reg_di; > + gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di > + : gen_altivec_lvsr_reg_di; > + gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di > + : gen_altivec_lvsl_reg_di; > } > else > { > shift_mode = SImode; > gen_ashl = gen_ashlsi3; > - gen_lvsl = gen_altivec_lvsl_reg_si; > - gen_lvsr = gen_altivec_lvsr_reg_si; > + gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si > + : gen_altivec_lvsr_reg_si; > + gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si > + : gen_altivec_lvsl_reg_si; > } > /* Generate the IDX for permute shift, width is the vector element size. > idx = idx * width. */ > @@ -7259,25 +7263,29 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) > > emit_insn (gen_ashl (tmp, idx, GEN_INT (shift))); > > - /* lvsr v1,0,idx. */ > - rtx pcvr = gen_reg_rtx (V16QImode); > - emit_insn (gen_lvsr (pcvr, tmp)); > - > - /* lvsl v2,0,idx. */ > - rtx pcvl = gen_reg_rtx (V16QImode); > - emit_insn (gen_lvsl (pcvl, tmp)); > + /* Generate one permutation control vector used for rotating the element > + at to-insert position to element zero in target vector. lvsl is > + used for big endianness while lvsr is used for little endianness: > + lvs[lr] v1,0,idx. */ > + rtx pcvr1 = gen_reg_rtx (V16QImode); > + emit_insn (gen_pcvr1 (pcvr1, tmp)); > > rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0); > + rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, > + pcvr1); > + emit_insn (perm1); > > - rtx permr > - = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr); > - emit_insn (permr); > - > + /* Insert val into element 0 of target vector. */ > rs6000_expand_vector_set (target, val, const0_rtx); > > - rtx perml > - = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl); > - emit_insn (perml); > + /* Rotate back with a reversed permutation control vector generated from: > + lvs[rl] v2,0,idx. */ > + rtx pcvr2 = gen_reg_rtx (V16QImode); > + emit_insn (gen_pcvr2 (pcvr2, tmp)); > + > + rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, > + pcvr2); > + emit_insn (perm2); > } > > /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX > -- > 2.39.1
Hi! On Fri, Feb 17, 2023 at 05:55:04PM +0800, Kewen.Lin wrote: > As PR108807 exposes, the current handling in function > rs6000_expand_vector_set_var_p9 doesn't take care of big > endianness. Currently the function is to rotate the > target vector by moving element to-be-set to element 0, > set element 0 with the given val, then rotate back. To > get the permutation control vector for the rotation, it > makes use of lvsr and lvsl, but the element ordering is > different for BE and LE (like element 0 is the most > significant one on BE while the least significant one on > LE), this patch is to add consideration for BE and make > sure permutation control vectors for rotations are expected. > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) > > machine_mode shift_mode; > rtx (*gen_ashl)(rtx, rtx, rtx); > - rtx (*gen_lvsl)(rtx, rtx); > - rtx (*gen_lvsr)(rtx, rtx); > + rtx (*gen_pcvr1)(rtx, rtx); > + rtx (*gen_pcvr2)(rtx, rtx); Space before "(" btw, you can fix that at the same time? :-) What does "pcvr" mean? You could put that in a short comment? > + /* Generate one permutation control vector used for rotating the element Ah. Yeah just "/* Permutation control vector */" for the above one prevents all mysteries :-) Patch looks good. Thanks! Segher
Hi Segher, Thanks for the review! on 2023/4/3 19:44, Segher Boessenkool wrote: > Hi! > > On Fri, Feb 17, 2023 at 05:55:04PM +0800, Kewen.Lin wrote: >> As PR108807 exposes, the current handling in function >> rs6000_expand_vector_set_var_p9 doesn't take care of big >> endianness. Currently the function is to rotate the >> target vector by moving element to-be-set to element 0, >> set element 0 with the given val, then rotate back. To >> get the permutation control vector for the rotation, it >> makes use of lvsr and lvsl, but the element ordering is >> different for BE and LE (like element 0 is the most >> significant one on BE while the least significant one on >> LE), this patch is to add consideration for BE and make >> sure permutation control vectors for rotations are expected. > >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) >> >> machine_mode shift_mode; >> rtx (*gen_ashl)(rtx, rtx, rtx); >> - rtx (*gen_lvsl)(rtx, rtx); >> - rtx (*gen_lvsr)(rtx, rtx); >> + rtx (*gen_pcvr1)(rtx, rtx); >> + rtx (*gen_pcvr2)(rtx, rtx); > > Space before "(" btw, you can fix that at the same time? :-) > Good catch, fixed. > What does "pcvr" mean? You could put that in a short comment? > >> + /* Generate one permutation control vector used for rotating the element > > Ah. Yeah just "/* Permutation control vector */" for the above one > prevents all mysteries :-) One comment line added for gen_* function pointers. > > Patch looks good. Thanks! > Pushed as r13-6994-gd634e6088f139e, thanks! BR, Kewen
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 16ca3a31757..774eb2963d9 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -7235,22 +7235,26 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) machine_mode shift_mode; rtx (*gen_ashl)(rtx, rtx, rtx); - rtx (*gen_lvsl)(rtx, rtx); - rtx (*gen_lvsr)(rtx, rtx); + rtx (*gen_pcvr1)(rtx, rtx); + rtx (*gen_pcvr2)(rtx, rtx); if (TARGET_POWERPC64) { shift_mode = DImode; gen_ashl = gen_ashldi3; - gen_lvsl = gen_altivec_lvsl_reg_di; - gen_lvsr = gen_altivec_lvsr_reg_di; + gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di + : gen_altivec_lvsr_reg_di; + gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di + : gen_altivec_lvsl_reg_di; } else { shift_mode = SImode; gen_ashl = gen_ashlsi3; - gen_lvsl = gen_altivec_lvsl_reg_si; - gen_lvsr = gen_altivec_lvsr_reg_si; + gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si + : gen_altivec_lvsr_reg_si; + gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si + : gen_altivec_lvsl_reg_si; } /* Generate the IDX for permute shift, width is the vector element size. idx = idx * width. */ @@ -7259,25 +7263,29 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) emit_insn (gen_ashl (tmp, idx, GEN_INT (shift))); - /* lvsr v1,0,idx. */ - rtx pcvr = gen_reg_rtx (V16QImode); - emit_insn (gen_lvsr (pcvr, tmp)); - - /* lvsl v2,0,idx. */ - rtx pcvl = gen_reg_rtx (V16QImode); - emit_insn (gen_lvsl (pcvl, tmp)); + /* Generate one permutation control vector used for rotating the element + at to-insert position to element zero in target vector. lvsl is + used for big endianness while lvsr is used for little endianness: + lvs[lr] v1,0,idx. */ + rtx pcvr1 = gen_reg_rtx (V16QImode); + emit_insn (gen_pcvr1 (pcvr1, tmp)); rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0); + rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, + pcvr1); + emit_insn (perm1); - rtx permr - = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr); - emit_insn (permr); - + /* Insert val into element 0 of target vector. */ rs6000_expand_vector_set (target, val, const0_rtx); - rtx perml - = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl); - emit_insn (perml); + /* Rotate back with a reversed permutation control vector generated from: + lvs[rl] v2,0,idx. */ + rtx pcvr2 = gen_reg_rtx (V16QImode); + emit_insn (gen_pcvr2 (pcvr2, tmp)); + + rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, + pcvr2); + emit_insn (perm2); } /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX