Message ID | 9692d146-7894-4c49-a1b0-bcf02539cd9e@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [Patch-2,rs6000] Eliminate unnecessary byte swaps for duplicated constant vector store [PR113325] | expand |
Hi, Gently ping the patch. https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643995.html Thanks Gui Haochen 在 2024/1/26 9:17, HAO CHEN GUI 写道: > Hi, > This patch creates an insn_and_split pattern which helps the duplicated > constant vector replace the source pseudo of store insn in fwprop pass. > Thus the store can be implemented by a single stxvd2x and it eliminates the > unnecessary byte swap insn on P8 LE. The test case shows the optimization. > > The patch depends on the first generic patch which uses insn cost in fwprop. > > Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no > regressions. > > Thanks > Gui Haochen > > > ChangeLog > rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store > > gcc/ > PR target/113325 > * config/rs6000/predicates.md (duplicate_easy_altivec_constant): New. > * config/rs6000/vsx.md (vsx_stxvd2x4_le_const_<mode>): New. > > gcc/testsuite/ > PR target/113325 > * gcc.target/powerpc/pr113325.c: New. > > > patch.diff > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index ef7d3f214c4..8ab6db630b7 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -759,6 +759,14 @@ (define_predicate "easy_vector_constant" > return false; > }) > > +;; Return 1 if it's a duplicated easy_altivec_constant. > +(define_predicate "duplicate_easy_altivec_constant" > + (and (match_code "const_vector") > + (match_test "easy_altivec_constant (op, mode)")) > +{ > + return const_vec_duplicate_p (op); > +}) > + > ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF. > (define_predicate "easy_vector_constant_add_self" > (and (match_code "const_vector") > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 26fa32829af..98e4be26f64 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -3362,6 +3362,29 @@ (define_insn "*vsx_stxvd2x4_le_<mode>" > "stxvd2x %x1,%y0" > [(set_attr "type" "vecstore")]) > > +(define_insn_and_split "vsx_stxvd2x4_le_const_<mode>" > + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") > + (match_operand:VSX_W 1 "duplicate_easy_altivec_constant" "W"))] > + "!BYTES_BIG_ENDIAN > + && VECTOR_MEM_VSX_P (<MODE>mode) > + && !TARGET_P9_VECTOR" > + "#" > + "&& 1" > + [(set (match_dup 2) > + (match_dup 1)) > + (set (match_dup 0) > + (vec_select:VSX_W > + (match_dup 2) > + (parallel [(const_int 2) (const_int 3) > + (const_int 0) (const_int 1)])))] > +{ > + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) > + : operands[1]; > + > +} > + [(set_attr "type" "vecstore") > + (set_attr "length" "8")]) > + > (define_insn "*vsx_stxvd2x8_le_V8HI" > [(set (match_operand:V8HI 0 "memory_operand" "=Z") > (vec_select:V8HI > diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c b/gcc/testsuite/gcc.target/powerpc/pr113325.c > new file mode 100644 > index 00000000000..dff68ac0a51 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */ > + > +void* foo (void* s1) > +{ > + return __builtin_memset (s1, 0, 32); > +}
Hi Haochen, on 2024/1/26 09:17, HAO CHEN GUI wrote: > Hi, > This patch creates an insn_and_split pattern which helps the duplicated > constant vector replace the source pseudo of store insn in fwprop pass. > Thus the store can be implemented by a single stxvd2x and it eliminates the > unnecessary byte swap insn on P8 LE. The test case shows the optimization. > > The patch depends on the first generic patch which uses insn cost in fwprop. > > Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no > regressions. > > Thanks > Gui Haochen > > > ChangeLog > rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store > > gcc/ > PR target/113325 > * config/rs6000/predicates.md (duplicate_easy_altivec_constant): New. > * config/rs6000/vsx.md (vsx_stxvd2x4_le_const_<mode>): New. > > gcc/testsuite/ > PR target/113325 > * gcc.target/powerpc/pr113325.c: New. > > > patch.diff > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index ef7d3f214c4..8ab6db630b7 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -759,6 +759,14 @@ (define_predicate "easy_vector_constant" > return false; > }) > > +;; Return 1 if it's a duplicated easy_altivec_constant. > +(define_predicate "duplicate_easy_altivec_constant" > + (and (match_code "const_vector") > + (match_test "easy_altivec_constant (op, mode)")) > +{ > + return const_vec_duplicate_p (op); > +}) This predicate can be moved to its only use (define_insn part condition). The const_vector match_code check is redundant as const_vec_duplicate_p already checks that, I wonder if we really need easy_altivec_constant? Even if one vector constant doesn't meet easy_altivec_constant, but if it matches the desired duplicated pattern, it doesn't need the swapping either, no? > + > ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF. > (define_predicate "easy_vector_constant_add_self" > (and (match_code "const_vector") > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 26fa32829af..98e4be26f64 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -3362,6 +3362,29 @@ (define_insn "*vsx_stxvd2x4_le_<mode>" > "stxvd2x %x1,%y0" > [(set_attr "type" "vecstore")]) > > +(define_insn_and_split "vsx_stxvd2x4_le_const_<mode>" > + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") > + (match_operand:VSX_W 1 "duplicate_easy_altivec_constant" "W"))] > + "!BYTES_BIG_ENDIAN > + && VECTOR_MEM_VSX_P (<MODE>mode) > + && !TARGET_P9_VECTOR" > + "#" > + "&& 1" > + [(set (match_dup 2) > + (match_dup 1)) > + (set (match_dup 0) > + (vec_select:VSX_W > + (match_dup 2) > + (parallel [(const_int 2) (const_int 3) > + (const_int 0) (const_int 1)])))] > +{ > + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) > + : operands[1]; > + > +} > + [(set_attr "type" "vecstore") > + (set_attr "length" "8")]) > + > (define_insn "*vsx_stxvd2x8_le_V8HI" > [(set (match_operand:V8HI 0 "memory_operand" "=Z") > (vec_select:V8HI > diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c b/gcc/testsuite/gcc.target/powerpc/pr113325.c > new file mode 100644 > index 00000000000..dff68ac0a51 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ Nit: s/powerpc_vsx_ok/powerpc_vsx/ BR, Kewen > +/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */ > + > +void* foo (void* s1) > +{ > + return __builtin_memset (s1, 0, 32); > +}
Hi Kewen, 在 2024/6/5 17:00, Kewen.Lin 写道: > This predicate can be moved to its only use (define_insn part condition). > The const_vector match_code check is redundant as const_vec_duplicate_p > already checks that, I wonder if we really need easy_altivec_constant? > Even if one vector constant doesn't meet easy_altivec_constant, but if > it matches the desired duplicated pattern, it doesn't need the swapping > either, no? Thanks for your comments. I think we need easy_altivec_constant as the constant will be directly moved to a vector register after split. It might fail if it's not a easy alitvec constant? [(set (match_dup 2) (match_dup 1)) Thanks Gui Haochen
Hi, on 2024/6/6 09:43, HAO CHEN GUI wrote: > Hi Kewen, > > 在 2024/6/5 17:00, Kewen.Lin 写道: >> This predicate can be moved to its only use (define_insn part condition). >> The const_vector match_code check is redundant as const_vec_duplicate_p >> already checks that, I wonder if we really need easy_altivec_constant? >> Even if one vector constant doesn't meet easy_altivec_constant, but if >> it matches the desired duplicated pattern, it doesn't need the swapping >> either, no? > > Thanks for your comments. > I think we need easy_altivec_constant as the constant will be directly > moved to a vector register after split. It might fail if it's not a easy > alitvec constant? > > [(set (match_dup 2) > (match_dup 1)) For that case, can we move operand 1 to a pseudo first by emit_move_insn, then use that pseudo for the source of set? BR, Kewen
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ef7d3f214c4..8ab6db630b7 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -759,6 +759,14 @@ (define_predicate "easy_vector_constant" return false; }) +;; Return 1 if it's a duplicated easy_altivec_constant. +(define_predicate "duplicate_easy_altivec_constant" + (and (match_code "const_vector") + (match_test "easy_altivec_constant (op, mode)")) +{ + return const_vec_duplicate_p (op); +}) + ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF. (define_predicate "easy_vector_constant_add_self" (and (match_code "const_vector") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 26fa32829af..98e4be26f64 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3362,6 +3362,29 @@ (define_insn "*vsx_stxvd2x4_le_<mode>" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) +(define_insn_and_split "vsx_stxvd2x4_le_const_<mode>" + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") + (match_operand:VSX_W 1 "duplicate_easy_altivec_constant" "W"))] + "!BYTES_BIG_ENDIAN + && VECTOR_MEM_VSX_P (<MODE>mode) + && !TARGET_P9_VECTOR" + "#" + "&& 1" + [(set (match_dup 2) + (match_dup 1)) + (set (match_dup 0) + (vec_select:VSX_W + (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; + +} + [(set_attr "type" "vecstore") + (set_attr "length" "8")]) + (define_insn "*vsx_stxvd2x8_le_V8HI" [(set (match_operand:V8HI 0 "memory_operand" "=Z") (vec_select:V8HI diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c b/gcc/testsuite/gcc.target/powerpc/pr113325.c new file mode 100644 index 00000000000..dff68ac0a51 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */ + +void* foo (void* s1) +{ + return __builtin_memset (s1, 0, 32); +}