diff mbox series

[v4,01/12] Add TARGET_READ_MEMSET_VALUE/TARGET_GEN_MEMSET_VALUE

Message ID 20210518191646.3071005-2-hjl.tools@gmail.com
State New
Headers show
Series Allow TImode/OImode/XImode in op_by_pieces operations | expand

Commit Message

H.J. Lu May 18, 2021, 7:16 p.m. UTC
Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
target instructions to duplicate QImode value to TImode/OImode/XImode
value for memmset.

	PR middle-end/90773
	* builtins.c (builtin_memset_read_str): Call
	targetm.read_memset_value.
	(builtin_memset_gen_str): Call targetm.gen_memset_value.
	* target.def (read_memset_value): New hook.
	(gen_memset_value): Likewise.
	* targhooks.c: Inclue "builtins.h".
	(default_read_memset_value): New function.
	(default_gen_memset_value): Likewise.
	* targhooks.h (default_read_memset_value): New prototype.
	(default_gen_memset_value): Likewise.
	* doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
	TARGET_GEN_MEMSET_VALUE hooks.
	* doc/tm.texi: Regenerated.
---
 gcc/builtins.c     | 47 ++++----------------------------------
 gcc/doc/tm.texi    | 16 +++++++++++++
 gcc/doc/tm.texi.in |  4 ++++
 gcc/target.def     | 20 +++++++++++++++++
 gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/targhooks.h    |  4 ++++
 6 files changed, 104 insertions(+), 43 deletions(-)

Comments

Richard Biener May 19, 2021, 9:25 a.m. UTC | #1
On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
> target instructions to duplicate QImode value to TImode/OImode/XImode
> value for memmset.
>
>         PR middle-end/90773
>         * builtins.c (builtin_memset_read_str): Call
>         targetm.read_memset_value.
>         (builtin_memset_gen_str): Call targetm.gen_memset_value.
>         * target.def (read_memset_value): New hook.
>         (gen_memset_value): Likewise.
>         * targhooks.c: Inclue "builtins.h".
>         (default_read_memset_value): New function.
>         (default_gen_memset_value): Likewise.
>         * targhooks.h (default_read_memset_value): New prototype.
>         (default_gen_memset_value): Likewise.
>         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
>         TARGET_GEN_MEMSET_VALUE hooks.
>         * doc/tm.texi: Regenerated.
> ---
>  gcc/builtins.c     | 47 ++++----------------------------------
>  gcc/doc/tm.texi    | 16 +++++++++++++
>  gcc/doc/tm.texi.in |  4 ++++
>  gcc/target.def     | 20 +++++++++++++++++
>  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
>  gcc/targhooks.h    |  4 ++++
>  6 files changed, 104 insertions(+), 43 deletions(-)
>
> diff --git a/gcc/builtins.c b/gcc/builtins.c
> index e1b284846b1..f78a36478ef 100644
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
>     previous iteration.  */
>
>  rtx
> -builtin_memset_read_str (void *data, void *prevp,
> +builtin_memset_read_str (void *data, void *prev,
>                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                          scalar_int_mode mode)
>  {
> -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> -  if (prev != nullptr && prev->data != nullptr)
> -    {
> -      /* Use the previous data in the same mode.  */
> -      if (prev->mode == mode)
> -       return prev->data;
> -    }
> -
> -  const char *c = (const char *) data;
> -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> -
> -  memset (p, *c, GET_MODE_SIZE (mode));
> -
> -  return c_readstr (p, mode);
> +  return targetm.read_memset_value ((const char *) data, prev, mode);
>  }
>
>  /* Callback routine for store_by_pieces.  Return the RTL of a register
> @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
>     nullptr, it has the RTL info from the previous iteration.  */
>
>  static rtx
> -builtin_memset_gen_str (void *data, void *prevp,
> +builtin_memset_gen_str (void *data, void *prev,
>                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                         scalar_int_mode mode)
>  {
> -  rtx target, coeff;
> -  size_t size;
> -  char *p;
> -
> -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> -  if (prev != nullptr && prev->data != nullptr)
> -    {
> -      /* Use the previous data in the same mode.  */
> -      if (prev->mode == mode)
> -       return prev->data;
> -
> -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> -      if (target != nullptr)
> -       return target;
> -    }
> -
> -  size = GET_MODE_SIZE (mode);
> -  if (size == 1)
> -    return (rtx) data;
> -
> -  p = XALLOCAVEC (char, size);
> -  memset (p, 1, size);
> -  coeff = c_readstr (p, mode);
> -
> -  target = convert_to_mode (mode, (rtx) data, 1);
> -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> -  return force_reg (mode, target);
> +  return targetm.gen_memset_value ((rtx) data, prev, mode);
>  }
>
>  /* Expand expression EXP, which is a call to the memset builtin.  Return
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 85ea9395560..51385044e76 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
>   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
>  @end deftypefn
>
> +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
> +This function returns the RTL of a constant integer corresponding to
> +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
> +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
> +the RTL information from the previous interation.
> +@end deftypefn
> +
> +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
> +This function returns the RTL of a register containing
> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> +char value given in the RTL register @var{data}.  For example, if
> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
> +the previous iteration.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
>  This target hook returns a new value for the number of times @var{loop}
>  should be unrolled. The parameter @var{nunroll} is the number of times
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index d8e3de14af1..8d4c3949fbf 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -7956,6 +7956,10 @@ lists.
>
>  @hook TARGET_GEN_CCMP_NEXT
>
> +@hook TARGET_READ_MEMSET_VALUE
> +
> +@hook TARGET_GEN_MEMSET_VALUE
> +
>  @hook TARGET_LOOP_UNROLL_ADJUST
>
>  @defmac POWI_MAX_MULTS
> diff --git a/gcc/target.def b/gcc/target.def
> index bbaf6b4f3a0..c9aca40fa88 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -2694,6 +2694,26 @@ DEFHOOK
>   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
>   NULL)
>
> +DEFHOOK
> +(read_memset_value,
> + "This function returns the RTL of a constant integer corresponding to\n\
> +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
> +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\

where is 'str' defined?  I can't really tell what's the difference
from read_memset_value
and gen_memset_value.

Somehow I feel that an optab for the "splat" operation similar
to vec_duplicate might be a better way to expose this - of course
that doesn't handle the "prev" thing.

So how's this the right point of abstraction to the target?

> +the RTL information from the previous interation.",
> + rtx, (const char *c, void *prev, scalar_int_mode mode),
> + default_read_memset_value)
> +
> +DEFHOOK
> +(gen_memset_value,
> + "This function returns the RTL of a register containing\n\
> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
> +char value given in the RTL register @var{data}.  For example, if\n\
> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.\n\
> +If @var{PREV} is not @samp{nullptr}, it is the RTL information from\n\
> +the previous iteration.",
> + rtx, (rtx data, void *prev, scalar_int_mode mode),
> + default_gen_memset_value)
> +
>  /* Return a new value for loop unroll size.  */
>  DEFHOOK
>  (loop_unroll_adjust,
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index 1947ef26fd6..b55e6ec6756 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -90,6 +90,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "attribs.h"
>  #include "asan.h"
>  #include "emit-rtl.h"
> +#include "builtins.h"
>
>  bool
>  default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
> @@ -2627,4 +2628,59 @@ default_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
>    return untagged_base;
>  }
>
> +/* Default implementation of TARGET_READ_MEMSET_VALUE.  */
> +
> +rtx
> +default_read_memset_value (const char *c, void *prevp,
> +                          scalar_int_mode mode)
> +{
> +  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> +  if (prev != nullptr && prev->data != nullptr)
> +    {
> +      /* Use the previous data in the same mode.  */
> +      if (prev->mode == mode)
> +       return prev->data;
> +    }
> +
> +  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> +
> +  memset (p, *c, GET_MODE_SIZE (mode));
> +
> +  return c_readstr (p, mode);
> +}
> +
> +/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
> +
> +rtx
> +default_gen_memset_value (rtx data, void *prevp, scalar_int_mode mode)
> +{
> +  rtx target, coeff;
> +  size_t size;
> +  char *p;
> +
> +  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> +  if (prev != nullptr && prev->data != nullptr)
> +    {
> +      /* Use the previous data in the same mode.  */
> +      if (prev->mode == mode)
> +       return prev->data;
> +
> +      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> +      if (target != nullptr)
> +       return target;
> +    }
> +
> +  size = GET_MODE_SIZE (mode);
> +  if (size == 1)
> +    return data;
> +
> +  p = XALLOCAVEC (char, size);
> +  memset (p, 1, size);
> +  coeff = c_readstr (p, mode);
> +
> +  target = convert_to_mode (mode, data, 1);
> +  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> +  return force_reg (mode, target);
> +}
> +
>  #include "gt-targhooks.h"
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index b537038c0aa..3c00927e196 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -300,4 +300,8 @@ extern rtx default_memtag_set_tag (rtx, rtx, rtx);
>  extern rtx default_memtag_extract_tag (rtx, rtx);
>  extern rtx default_memtag_untagged_pointer (rtx, rtx);
>
> +extern rtx default_read_memset_value (const char *, void *,
> +                                     scalar_int_mode);
> +extern rtx default_gen_memset_value (rtx, void *, scalar_int_mode);
> +
>  #endif /* GCC_TARGHOOKS_H */
> --
> 2.31.1
>
H.J. Lu May 19, 2021, 12:55 p.m. UTC | #2
On Wed, May 19, 2021 at 2:25 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
> > target instructions to duplicate QImode value to TImode/OImode/XImode
> > value for memmset.
> >
> >         PR middle-end/90773
> >         * builtins.c (builtin_memset_read_str): Call
> >         targetm.read_memset_value.
> >         (builtin_memset_gen_str): Call targetm.gen_memset_value.
> >         * target.def (read_memset_value): New hook.
> >         (gen_memset_value): Likewise.
> >         * targhooks.c: Inclue "builtins.h".
> >         (default_read_memset_value): New function.
> >         (default_gen_memset_value): Likewise.
> >         * targhooks.h (default_read_memset_value): New prototype.
> >         (default_gen_memset_value): Likewise.
> >         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
> >         TARGET_GEN_MEMSET_VALUE hooks.
> >         * doc/tm.texi: Regenerated.
> > ---
> >  gcc/builtins.c     | 47 ++++----------------------------------
> >  gcc/doc/tm.texi    | 16 +++++++++++++
> >  gcc/doc/tm.texi.in |  4 ++++
> >  gcc/target.def     | 20 +++++++++++++++++
> >  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
> >  gcc/targhooks.h    |  4 ++++
> >  6 files changed, 104 insertions(+), 43 deletions(-)
> >
> > diff --git a/gcc/builtins.c b/gcc/builtins.c
> > index e1b284846b1..f78a36478ef 100644
> > --- a/gcc/builtins.c
> > +++ b/gcc/builtins.c
> > @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
> >     previous iteration.  */
> >
> >  rtx
> > -builtin_memset_read_str (void *data, void *prevp,
> > +builtin_memset_read_str (void *data, void *prev,
> >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >                          scalar_int_mode mode)
> >  {
> > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > -  if (prev != nullptr && prev->data != nullptr)
> > -    {
> > -      /* Use the previous data in the same mode.  */
> > -      if (prev->mode == mode)
> > -       return prev->data;
> > -    }
> > -
> > -  const char *c = (const char *) data;
> > -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> > -
> > -  memset (p, *c, GET_MODE_SIZE (mode));
> > -
> > -  return c_readstr (p, mode);
> > +  return targetm.read_memset_value ((const char *) data, prev, mode);
> >  }
> >
> >  /* Callback routine for store_by_pieces.  Return the RTL of a register
> > @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
> >     nullptr, it has the RTL info from the previous iteration.  */
> >
> >  static rtx
> > -builtin_memset_gen_str (void *data, void *prevp,
> > +builtin_memset_gen_str (void *data, void *prev,
> >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >                         scalar_int_mode mode)
> >  {
> > -  rtx target, coeff;
> > -  size_t size;
> > -  char *p;
> > -
> > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > -  if (prev != nullptr && prev->data != nullptr)
> > -    {
> > -      /* Use the previous data in the same mode.  */
> > -      if (prev->mode == mode)
> > -       return prev->data;
> > -
> > -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> > -      if (target != nullptr)
> > -       return target;
> > -    }
> > -
> > -  size = GET_MODE_SIZE (mode);
> > -  if (size == 1)
> > -    return (rtx) data;
> > -
> > -  p = XALLOCAVEC (char, size);
> > -  memset (p, 1, size);
> > -  coeff = c_readstr (p, mode);
> > -
> > -  target = convert_to_mode (mode, (rtx) data, 1);
> > -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> > -  return force_reg (mode, target);
> > +  return targetm.gen_memset_value ((rtx) data, prev, mode);
> >  }
> >
> >  /* Expand expression EXP, which is a call to the memset builtin.  Return
> > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> > index 85ea9395560..51385044e76 100644
> > --- a/gcc/doc/tm.texi
> > +++ b/gcc/doc/tm.texi
> > @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
> >   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
> >  @end deftypefn
> >
> > +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
> > +This function returns the RTL of a constant integer corresponding to
> > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
> > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
> > +the RTL information from the previous interation.
> > +@end deftypefn
> > +
> > +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
> > +This function returns the RTL of a register containing
> > +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> > +char value given in the RTL register @var{data}.  For example, if
> > +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> > +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
> > +the previous iteration.
> > +@end deftypefn
> > +
> >  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
> >  This target hook returns a new value for the number of times @var{loop}
> >  should be unrolled. The parameter @var{nunroll} is the number of times
> > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> > index d8e3de14af1..8d4c3949fbf 100644
> > --- a/gcc/doc/tm.texi.in
> > +++ b/gcc/doc/tm.texi.in
> > @@ -7956,6 +7956,10 @@ lists.
> >
> >  @hook TARGET_GEN_CCMP_NEXT
> >
> > +@hook TARGET_READ_MEMSET_VALUE
> > +
> > +@hook TARGET_GEN_MEMSET_VALUE
> > +
> >  @hook TARGET_LOOP_UNROLL_ADJUST
> >
> >  @defmac POWI_MAX_MULTS
> > diff --git a/gcc/target.def b/gcc/target.def
> > index bbaf6b4f3a0..c9aca40fa88 100644
> > --- a/gcc/target.def
> > +++ b/gcc/target.def
> > @@ -2694,6 +2694,26 @@ DEFHOOK
> >   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
> >   NULL)
> >
> > +DEFHOOK
> > +(read_memset_value,
> > + "This function returns the RTL of a constant integer corresponding to\n\
> > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
> > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
>
> where is 'str' defined?  I can't really tell what's the difference

Fixed with

diff --git a/gcc/target.def b/gcc/target.def
index c9aca40fa88..4c3a5fcc634 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2699,8 +2699,8 @@ DEFHOOK
  "This function returns the RTL of a constant integer corresponding to\n\
 target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the string\n\
 constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
-the RTL information from the previous interation.",
- rtx, (const char *c, void *prev, scalar_int_mode mode),
+the RTL information from the previous iteration.",
+ rtx, (const char *str, void *prev, scalar_int_mode mode),
  default_read_memset_value)

 DEFHOOK

> from read_memset_value
> and gen_memset_value.

The difference is that input of read_memset_value is a string constant
like "123" and input of gen_memset_value is an RTL register.

> Somehow I feel that an optab for the "splat" operation similar
> to vec_duplicate might be a better way to expose this - of course
> that doesn't handle the "prev" thing.

The x86 backend has ix86_expand_vector_init_duplicate () to
broadcast QImode to TImode/OImode/XImode:

/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
   with all elements equal to VAR.  Return true if successful.  */

bool
ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
                                   rtx target, rtx val)

> So how's this the right point of abstraction to the target?

I can add 2 target hooks, one for scratch register and one for
broadcasting QImode to TImode/OImode/XImode.   Then I can
move x86 codes to the middle-end.

> > +the RTL information from the previous interation.",
> > + rtx, (const char *c, void *prev, scalar_int_mode mode),
> > + default_read_memset_value)
> > +
> > +DEFHOOK
> > +(gen_memset_value,
> > + "This function returns the RTL of a register containing\n\
> > +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
> > +char value given in the RTL register @var{data}.  For example, if\n\
> > +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.\n\
> > +If @var{PREV} is not @samp{nullptr}, it is the RTL information from\n\
> > +the previous iteration.",
> > + rtx, (rtx data, void *prev, scalar_int_mode mode),
> > + default_gen_memset_value)
> > +
> >  /* Return a new value for loop unroll size.  */
> >  DEFHOOK
> >  (loop_unroll_adjust,
> > diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> > index 1947ef26fd6..b55e6ec6756 100644
> > --- a/gcc/targhooks.c
> > +++ b/gcc/targhooks.c
> > @@ -90,6 +90,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "attribs.h"
> >  #include "asan.h"
> >  #include "emit-rtl.h"
> > +#include "builtins.h"
> >
> >  bool
> >  default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
> > @@ -2627,4 +2628,59 @@ default_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
> >    return untagged_base;
> >  }
> >
> > +/* Default implementation of TARGET_READ_MEMSET_VALUE.  */
> > +
> > +rtx
> > +default_read_memset_value (const char *c, void *prevp,
> > +                          scalar_int_mode mode)
> > +{
> > +  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > +  if (prev != nullptr && prev->data != nullptr)
> > +    {
> > +      /* Use the previous data in the same mode.  */
> > +      if (prev->mode == mode)
> > +       return prev->data;
> > +    }
> > +
> > +  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> > +
> > +  memset (p, *c, GET_MODE_SIZE (mode));
> > +
> > +  return c_readstr (p, mode);
> > +}
> > +
> > +/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
> > +
> > +rtx
> > +default_gen_memset_value (rtx data, void *prevp, scalar_int_mode mode)
> > +{
> > +  rtx target, coeff;
> > +  size_t size;
> > +  char *p;
> > +
> > +  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > +  if (prev != nullptr && prev->data != nullptr)
> > +    {
> > +      /* Use the previous data in the same mode.  */
> > +      if (prev->mode == mode)
> > +       return prev->data;
> > +
> > +      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> > +      if (target != nullptr)
> > +       return target;
> > +    }
> > +
> > +  size = GET_MODE_SIZE (mode);
> > +  if (size == 1)
> > +    return data;
> > +
> > +  p = XALLOCAVEC (char, size);
> > +  memset (p, 1, size);
> > +  coeff = c_readstr (p, mode);
> > +
> > +  target = convert_to_mode (mode, data, 1);
> > +  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> > +  return force_reg (mode, target);
> > +}
> > +
> >  #include "gt-targhooks.h"
> > diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> > index b537038c0aa..3c00927e196 100644
> > --- a/gcc/targhooks.h
> > +++ b/gcc/targhooks.h
> > @@ -300,4 +300,8 @@ extern rtx default_memtag_set_tag (rtx, rtx, rtx);
> >  extern rtx default_memtag_extract_tag (rtx, rtx);
> >  extern rtx default_memtag_untagged_pointer (rtx, rtx);
> >
> > +extern rtx default_read_memset_value (const char *, void *,
> > +                                     scalar_int_mode);
> > +extern rtx default_gen_memset_value (rtx, void *, scalar_int_mode);
> > +
> >  #endif /* GCC_TARGHOOKS_H */
> > --
> > 2.31.1
> >
H.J. Lu May 20, 2021, 8:49 p.m. UTC | #3
On Wed, May 19, 2021 at 5:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, May 19, 2021 at 2:25 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
> > > target instructions to duplicate QImode value to TImode/OImode/XImode
> > > value for memmset.
> > >
> > >         PR middle-end/90773
> > >         * builtins.c (builtin_memset_read_str): Call
> > >         targetm.read_memset_value.
> > >         (builtin_memset_gen_str): Call targetm.gen_memset_value.
> > >         * target.def (read_memset_value): New hook.
> > >         (gen_memset_value): Likewise.
> > >         * targhooks.c: Inclue "builtins.h".
> > >         (default_read_memset_value): New function.
> > >         (default_gen_memset_value): Likewise.
> > >         * targhooks.h (default_read_memset_value): New prototype.
> > >         (default_gen_memset_value): Likewise.
> > >         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
> > >         TARGET_GEN_MEMSET_VALUE hooks.
> > >         * doc/tm.texi: Regenerated.
> > > ---
> > >  gcc/builtins.c     | 47 ++++----------------------------------
> > >  gcc/doc/tm.texi    | 16 +++++++++++++
> > >  gcc/doc/tm.texi.in |  4 ++++
> > >  gcc/target.def     | 20 +++++++++++++++++
> > >  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  gcc/targhooks.h    |  4 ++++
> > >  6 files changed, 104 insertions(+), 43 deletions(-)
> > >
> > > diff --git a/gcc/builtins.c b/gcc/builtins.c
> > > index e1b284846b1..f78a36478ef 100644
> > > --- a/gcc/builtins.c
> > > +++ b/gcc/builtins.c
> > > @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
> > >     previous iteration.  */
> > >
> > >  rtx
> > > -builtin_memset_read_str (void *data, void *prevp,
> > > +builtin_memset_read_str (void *data, void *prev,
> > >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > >                          scalar_int_mode mode)
> > >  {
> > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > -  if (prev != nullptr && prev->data != nullptr)
> > > -    {
> > > -      /* Use the previous data in the same mode.  */
> > > -      if (prev->mode == mode)
> > > -       return prev->data;
> > > -    }
> > > -
> > > -  const char *c = (const char *) data;
> > > -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> > > -
> > > -  memset (p, *c, GET_MODE_SIZE (mode));
> > > -
> > > -  return c_readstr (p, mode);
> > > +  return targetm.read_memset_value ((const char *) data, prev, mode);
> > >  }
> > >
> > >  /* Callback routine for store_by_pieces.  Return the RTL of a register
> > > @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
> > >     nullptr, it has the RTL info from the previous iteration.  */
> > >
> > >  static rtx
> > > -builtin_memset_gen_str (void *data, void *prevp,
> > > +builtin_memset_gen_str (void *data, void *prev,
> > >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > >                         scalar_int_mode mode)
> > >  {
> > > -  rtx target, coeff;
> > > -  size_t size;
> > > -  char *p;
> > > -
> > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > -  if (prev != nullptr && prev->data != nullptr)
> > > -    {
> > > -      /* Use the previous data in the same mode.  */
> > > -      if (prev->mode == mode)
> > > -       return prev->data;
> > > -
> > > -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> > > -      if (target != nullptr)
> > > -       return target;
> > > -    }
> > > -
> > > -  size = GET_MODE_SIZE (mode);
> > > -  if (size == 1)
> > > -    return (rtx) data;
> > > -
> > > -  p = XALLOCAVEC (char, size);
> > > -  memset (p, 1, size);
> > > -  coeff = c_readstr (p, mode);
> > > -
> > > -  target = convert_to_mode (mode, (rtx) data, 1);
> > > -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> > > -  return force_reg (mode, target);
> > > +  return targetm.gen_memset_value ((rtx) data, prev, mode);
> > >  }
> > >
> > >  /* Expand expression EXP, which is a call to the memset builtin.  Return
> > > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> > > index 85ea9395560..51385044e76 100644
> > > --- a/gcc/doc/tm.texi
> > > +++ b/gcc/doc/tm.texi
> > > @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
> > >   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
> > >  @end deftypefn
> > >
> > > +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
> > > +This function returns the RTL of a constant integer corresponding to
> > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
> > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
> > > +the RTL information from the previous interation.
> > > +@end deftypefn
> > > +
> > > +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
> > > +This function returns the RTL of a register containing
> > > +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> > > +char value given in the RTL register @var{data}.  For example, if
> > > +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> > > +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
> > > +the previous iteration.
> > > +@end deftypefn
> > > +
> > >  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
> > >  This target hook returns a new value for the number of times @var{loop}
> > >  should be unrolled. The parameter @var{nunroll} is the number of times
> > > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> > > index d8e3de14af1..8d4c3949fbf 100644
> > > --- a/gcc/doc/tm.texi.in
> > > +++ b/gcc/doc/tm.texi.in
> > > @@ -7956,6 +7956,10 @@ lists.
> > >
> > >  @hook TARGET_GEN_CCMP_NEXT
> > >
> > > +@hook TARGET_READ_MEMSET_VALUE
> > > +
> > > +@hook TARGET_GEN_MEMSET_VALUE
> > > +
> > >  @hook TARGET_LOOP_UNROLL_ADJUST
> > >
> > >  @defmac POWI_MAX_MULTS
> > > diff --git a/gcc/target.def b/gcc/target.def
> > > index bbaf6b4f3a0..c9aca40fa88 100644
> > > --- a/gcc/target.def
> > > +++ b/gcc/target.def
> > > @@ -2694,6 +2694,26 @@ DEFHOOK
> > >   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
> > >   NULL)
> > >
> > > +DEFHOOK
> > > +(read_memset_value,
> > > + "This function returns the RTL of a constant integer corresponding to\n\
> > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
> > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> >
> > where is 'str' defined?  I can't really tell what's the difference
>
> Fixed with
>
> diff --git a/gcc/target.def b/gcc/target.def
> index c9aca40fa88..4c3a5fcc634 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -2699,8 +2699,8 @@ DEFHOOK
>   "This function returns the RTL of a constant integer corresponding to\n\
>  target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the string\n\
>  constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> -the RTL information from the previous interation.",
> - rtx, (const char *c, void *prev, scalar_int_mode mode),
> +the RTL information from the previous iteration.",
> + rtx, (const char *str, void *prev, scalar_int_mode mode),
>   default_read_memset_value)
>
>  DEFHOOK
>
> > from read_memset_value
> > and gen_memset_value.
>
> The difference is that input of read_memset_value is a string constant
> like "123" and input of gen_memset_value is an RTL register.
>
> > Somehow I feel that an optab for the "splat" operation similar
> > to vec_duplicate might be a better way to expose this - of course
> > that doesn't handle the "prev" thing.
>
> The x86 backend has ix86_expand_vector_init_duplicate () to
> broadcast QImode to TImode/OImode/XImode:
>
> /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
>    with all elements equal to VAR.  Return true if successful.  */
>
> bool
> ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
>                                    rtx target, rtx val)
>
> > So how's this the right point of abstraction to the target?
>
> I can add 2 target hooks, one for scratch register and one for
> broadcasting QImode to TImode/OImode/XImode.   Then I can
> move x86 codes to the middle-end.
>

Here is the patch to add 3 target hooks:

 -- Target Hook: rtx TARGET_READ_MEMSET_VALUE (const char *C,
          scalar_int_mode MODE)
     This function returns the RTL of a constant integer corresponding
     to target reading 'GET_MODE_SIZE (MODE)' bytes from the string
     constant C.

 -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, scalar_int_mode
          MODE)
     This function returns the RTL of a register containing
     'GET_MODE_SIZE (MODE)' consecutive copies of the unsigned char
     value given in the RTL register DATA.  For example, if MODE is 4
     bytes wide, return the RTL for 0x01010101*DATA.

 -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE_FROM_PREV (void *PREV,
          scalar_int_mode MODE)
     This function returns the RTL of a register in MODE generated from
     PREV in the previous iteration.

with

/* Return the RTL of a register in MODE generated from PREV in the
   previous iteration.  */

static rtx
gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
{
  by_pieces_prev *prev = (by_pieces_prev *) prevp;
  rtx value;
  if (prev != nullptr && prev->data != nullptr)
    {
      /* Use the previous data in the same mode.  */
      if (prev->mode == mode)
        return prev->data;

      value = targetm.gen_memset_value_from_prev (prevp, mode);
    }
  else
    value = nullptr;
  return value;
}

/* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
   bytes from constant string DATA + OFFSET and return it as target
   constant.  If PREV isn't nullptr, it has the RTL info from the
   previous iteration.  */

rtx
builtin_memset_read_str (void *data, void *prev,
                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
                         scalar_int_mode mode)
{
  const char *str = (const char *) data;

  /* Don't use the previous value if size is 1.  */
  if (GET_MODE_SIZE (mode) == 1)
    return default_read_memset_value (str, mode);

  rtx value = gen_memset_value_from_prev (prev, mode);
  if (value)
    return value;

  return targetm.read_memset_value (str, mode);
}

/* Callback routine for store_by_pieces.  Return the RTL of a register
   containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned
   char value given in the RTL register data.  For example, if mode is
   4 bytes wide, return the RTL for 0x01010101*data.  If PREV isn't
   nullptr, it has the RTL info from the previous iteration.  */

static rtx
builtin_memset_gen_str (void *datap, void *prev,
                        HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
                        scalar_int_mode mode)
{
  rtx data = (rtx) datap;

  /* Don't use the previous value if size is 1.  */
  if (GET_MODE_SIZE (mode) == 1)
    return data;

  rtx value = gen_memset_value_from_prev (prev, mode);
  if (value)
    return value;

  return targetm.gen_memset_value (data, mode);
}
Bernd Edlinger May 21, 2021, 5:42 a.m. UTC | #4
On 5/20/21 10:49 PM, H.J. Lu wrote:
> On Wed, May 19, 2021 at 5:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Wed, May 19, 2021 at 2:25 AM Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>>
>>> On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>
>>>> Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
>>>> target instructions to duplicate QImode value to TImode/OImode/XImode
>>>> value for memmset.
>>>>
>>>>         PR middle-end/90773
>>>>         * builtins.c (builtin_memset_read_str): Call
>>>>         targetm.read_memset_value.
>>>>         (builtin_memset_gen_str): Call targetm.gen_memset_value.
>>>>         * target.def (read_memset_value): New hook.
>>>>         (gen_memset_value): Likewise.
>>>>         * targhooks.c: Inclue "builtins.h".
>>>>         (default_read_memset_value): New function.
>>>>         (default_gen_memset_value): Likewise.
>>>>         * targhooks.h (default_read_memset_value): New prototype.
>>>>         (default_gen_memset_value): Likewise.
>>>>         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
>>>>         TARGET_GEN_MEMSET_VALUE hooks.
>>>>         * doc/tm.texi: Regenerated.
>>>> ---
>>>>  gcc/builtins.c     | 47 ++++----------------------------------
>>>>  gcc/doc/tm.texi    | 16 +++++++++++++
>>>>  gcc/doc/tm.texi.in |  4 ++++
>>>>  gcc/target.def     | 20 +++++++++++++++++
>>>>  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
>>>>  gcc/targhooks.h    |  4 ++++
>>>>  6 files changed, 104 insertions(+), 43 deletions(-)
>>>>
>>>> diff --git a/gcc/builtins.c b/gcc/builtins.c
>>>> index e1b284846b1..f78a36478ef 100644
>>>> --- a/gcc/builtins.c
>>>> +++ b/gcc/builtins.c
>>>> @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
>>>>     previous iteration.  */
>>>>
>>>>  rtx
>>>> -builtin_memset_read_str (void *data, void *prevp,
>>>> +builtin_memset_read_str (void *data, void *prev,
>>>>                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>>>>                          scalar_int_mode mode)
>>>>  {
>>>> -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
>>>> -  if (prev != nullptr && prev->data != nullptr)
>>>> -    {
>>>> -      /* Use the previous data in the same mode.  */
>>>> -      if (prev->mode == mode)
>>>> -       return prev->data;
>>>> -    }
>>>> -
>>>> -  const char *c = (const char *) data;
>>>> -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
>>>> -
>>>> -  memset (p, *c, GET_MODE_SIZE (mode));
>>>> -
>>>> -  return c_readstr (p, mode);
>>>> +  return targetm.read_memset_value ((const char *) data, prev, mode);
>>>>  }
>>>>
>>>>  /* Callback routine for store_by_pieces.  Return the RTL of a register
>>>> @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
>>>>     nullptr, it has the RTL info from the previous iteration.  */
>>>>
>>>>  static rtx
>>>> -builtin_memset_gen_str (void *data, void *prevp,
>>>> +builtin_memset_gen_str (void *data, void *prev,
>>>>                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>>>>                         scalar_int_mode mode)
>>>>  {
>>>> -  rtx target, coeff;
>>>> -  size_t size;
>>>> -  char *p;
>>>> -
>>>> -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
>>>> -  if (prev != nullptr && prev->data != nullptr)
>>>> -    {
>>>> -      /* Use the previous data in the same mode.  */
>>>> -      if (prev->mode == mode)
>>>> -       return prev->data;
>>>> -
>>>> -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
>>>> -      if (target != nullptr)
>>>> -       return target;
>>>> -    }
>>>> -
>>>> -  size = GET_MODE_SIZE (mode);
>>>> -  if (size == 1)
>>>> -    return (rtx) data;
>>>> -
>>>> -  p = XALLOCAVEC (char, size);
>>>> -  memset (p, 1, size);
>>>> -  coeff = c_readstr (p, mode);
>>>> -
>>>> -  target = convert_to_mode (mode, (rtx) data, 1);
>>>> -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
>>>> -  return force_reg (mode, target);
>>>> +  return targetm.gen_memset_value ((rtx) data, prev, mode);
>>>>  }
>>>>
>>>>  /* Expand expression EXP, which is a call to the memset builtin.  Return
>>>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>>>> index 85ea9395560..51385044e76 100644
>>>> --- a/gcc/doc/tm.texi
>>>> +++ b/gcc/doc/tm.texi
>>>> @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
>>>>   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
>>>>  @end deftypefn
>>>>
>>>> +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
>>>> +This function returns the RTL of a constant integer corresponding to
>>>> +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
>>>> +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
>>>> +the RTL information from the previous interation.
>>>> +@end deftypefn
>>>> +
>>>> +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
>>>> +This function returns the RTL of a register containing
>>>> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
>>>> +char value given in the RTL register @var{data}.  For example, if
>>>> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
>>>> +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
>>>> +the previous iteration.
>>>> +@end deftypefn
>>>> +
>>>>  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
>>>>  This target hook returns a new value for the number of times @var{loop}
>>>>  should be unrolled. The parameter @var{nunroll} is the number of times
>>>> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
>>>> index d8e3de14af1..8d4c3949fbf 100644
>>>> --- a/gcc/doc/tm.texi.in
>>>> +++ b/gcc/doc/tm.texi.in
>>>> @@ -7956,6 +7956,10 @@ lists.
>>>>
>>>>  @hook TARGET_GEN_CCMP_NEXT
>>>>
>>>> +@hook TARGET_READ_MEMSET_VALUE
>>>> +
>>>> +@hook TARGET_GEN_MEMSET_VALUE
>>>> +
>>>>  @hook TARGET_LOOP_UNROLL_ADJUST
>>>>
>>>>  @defmac POWI_MAX_MULTS
>>>> diff --git a/gcc/target.def b/gcc/target.def
>>>> index bbaf6b4f3a0..c9aca40fa88 100644
>>>> --- a/gcc/target.def
>>>> +++ b/gcc/target.def
>>>> @@ -2694,6 +2694,26 @@ DEFHOOK
>>>>   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
>>>>   NULL)
>>>>
>>>> +DEFHOOK
>>>> +(read_memset_value,
>>>> + "This function returns the RTL of a constant integer corresponding to\n\
>>>> +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
>>>> +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
>>>
>>> where is 'str' defined?  I can't really tell what's the difference
>>
>> Fixed with
>>
>> diff --git a/gcc/target.def b/gcc/target.def
>> index c9aca40fa88..4c3a5fcc634 100644
>> --- a/gcc/target.def
>> +++ b/gcc/target.def
>> @@ -2699,8 +2699,8 @@ DEFHOOK
>>   "This function returns the RTL of a constant integer corresponding to\n\
>>  target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the string\n\
>>  constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
>> -the RTL information from the previous interation.",
>> - rtx, (const char *c, void *prev, scalar_int_mode mode),
>> +the RTL information from the previous iteration.",
>> + rtx, (const char *str, void *prev, scalar_int_mode mode),
>>   default_read_memset_value)
>>
>>  DEFHOOK
>>
>>> from read_memset_value
>>> and gen_memset_value.
>>
>> The difference is that input of read_memset_value is a string constant
>> like "123" and input of gen_memset_value is an RTL register.
>>
>>> Somehow I feel that an optab for the "splat" operation similar
>>> to vec_duplicate might be a better way to expose this - of course
>>> that doesn't handle the "prev" thing.
>>
>> The x86 backend has ix86_expand_vector_init_duplicate () to
>> broadcast QImode to TImode/OImode/XImode:
>>
>> /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
>>    with all elements equal to VAR.  Return true if successful.  */
>>
>> bool
>> ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
>>                                    rtx target, rtx val)
>>
>>> So how's this the right point of abstraction to the target?
>>
>> I can add 2 target hooks, one for scratch register and one for
>> broadcasting QImode to TImode/OImode/XImode.   Then I can
>> move x86 codes to the middle-end.
>>
> 
> Here is the patch to add 3 target hooks:
> 
>  -- Target Hook: rtx TARGET_READ_MEMSET_VALUE (const char *C,
>           scalar_int_mode MODE)
>      This function returns the RTL of a constant integer corresponding
>      to target reading 'GET_MODE_SIZE (MODE)' bytes from the string
>      constant C.
> 
>  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, scalar_int_mode
>           MODE)
>      This function returns the RTL of a register containing
>      'GET_MODE_SIZE (MODE)' consecutive copies of the unsigned char
>      value given in the RTL register DATA.  For example, if MODE is 4
>      bytes wide, return the RTL for 0x01010101*DATA.
> 
>  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE_FROM_PREV (void *PREV,
>           scalar_int_mode MODE)
>      This function returns the RTL of a register in MODE generated from
>      PREV in the previous iteration.
> 
> with
> 
> /* Return the RTL of a register in MODE generated from PREV in the
>    previous iteration.  */
> 
> static rtx
> gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
> {
>   by_pieces_prev *prev = (by_pieces_prev *) prevp;
>   rtx value;
>   if (prev != nullptr && prev->data != nullptr)
>     {
>       /* Use the previous data in the same mode.  */
>       if (prev->mode == mode)
>         return prev->data;
> 
>       value = targetm.gen_memset_value_from_prev (prevp, mode);
>     }
>   else
>     value = nullptr;
>   return value;
> }
> 
> /* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
>    bytes from constant string DATA + OFFSET and return it as target
>    constant.  If PREV isn't nullptr, it has the RTL info from the
>    previous iteration.  */
> 
> rtx
> builtin_memset_read_str (void *data, void *prev,
>                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                          scalar_int_mode mode)
> {
>   const char *str = (const char *) data;
> 
>   /* Don't use the previous value if size is 1.  */
>   if (GET_MODE_SIZE (mode) == 1)
>     return default_read_memset_value (str, mode);
> 
>   rtx value = gen_memset_value_from_prev (prev, mode);
>   if (value)
>     return value;
> 
>   return targetm.read_memset_value (str, mode);
> }
> 
> /* Callback routine for store_by_pieces.  Return the RTL of a register
>    containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned
>    char value given in the RTL register data.  For example, if mode is
>    4 bytes wide, return the RTL for 0x01010101*data.  If PREV isn't
>    nullptr, it has the RTL info from the previous iteration.  */
> 
> static rtx
> builtin_memset_gen_str (void *datap, void *prev,
>                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                         scalar_int_mode mode)
> {
>   rtx data = (rtx) datap;
> 
>   /* Don't use the previous value if size is 1.  */
>   if (GET_MODE_SIZE (mode) == 1)
>     return data;
> 
>   rtx value = gen_memset_value_from_prev (prev, mode);
>   if (value)
>     return value;
> 
>   return targetm.gen_memset_value (data, mode);
> }> +/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
> +
> +rtx
> +default_gen_memset_value (rtx data, scalar_int_mode mode)
> +{
> +  rtx target, coeff;
> +  size_t size;
> +  char *p;
> +
> +  size = GET_MODE_SIZE (mode);
> +  if (size == 1)
> +    return data;
> +
> +  p = XALLOCAVEC (char, size);
> +  memset (p, 1, size);
> +  coeff = c_readstr (p, mode);
> +
> +  target = convert_to_mode (mode, data, 1);
> +  target = expand_mult (mode, target, coeff, NULL_RTX, 1);


Note this formula does not work for data = -1 for instance,
since 0x01010101U * -1 = 0xFEFEFEFFU
but memset(str, n, -1) set it to 0xFFFFFFFFU, right?

So are we sure that the value of "data" is always in the range [0..255] ?


Bernd.
H.J. Lu May 21, 2021, 11:53 a.m. UTC | #5
On Thu, May 20, 2021 at 10:42 PM Bernd Edlinger
<bernd.edlinger@hotmail.de> wrote:
>
> On 5/20/21 10:49 PM, H.J. Lu wrote:
> > On Wed, May 19, 2021 at 5:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>
> >> On Wed, May 19, 2021 at 2:25 AM Richard Biener
> >> <richard.guenther@gmail.com> wrote:
> >>>
> >>> On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>>
> >>>> Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
> >>>> target instructions to duplicate QImode value to TImode/OImode/XImode
> >>>> value for memmset.
> >>>>
> >>>>         PR middle-end/90773
> >>>>         * builtins.c (builtin_memset_read_str): Call
> >>>>         targetm.read_memset_value.
> >>>>         (builtin_memset_gen_str): Call targetm.gen_memset_value.
> >>>>         * target.def (read_memset_value): New hook.
> >>>>         (gen_memset_value): Likewise.
> >>>>         * targhooks.c: Inclue "builtins.h".
> >>>>         (default_read_memset_value): New function.
> >>>>         (default_gen_memset_value): Likewise.
> >>>>         * targhooks.h (default_read_memset_value): New prototype.
> >>>>         (default_gen_memset_value): Likewise.
> >>>>         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
> >>>>         TARGET_GEN_MEMSET_VALUE hooks.
> >>>>         * doc/tm.texi: Regenerated.
> >>>> ---
> >>>>  gcc/builtins.c     | 47 ++++----------------------------------
> >>>>  gcc/doc/tm.texi    | 16 +++++++++++++
> >>>>  gcc/doc/tm.texi.in |  4 ++++
> >>>>  gcc/target.def     | 20 +++++++++++++++++
> >>>>  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
> >>>>  gcc/targhooks.h    |  4 ++++
> >>>>  6 files changed, 104 insertions(+), 43 deletions(-)
> >>>>
> >>>> diff --git a/gcc/builtins.c b/gcc/builtins.c
> >>>> index e1b284846b1..f78a36478ef 100644
> >>>> --- a/gcc/builtins.c
> >>>> +++ b/gcc/builtins.c
> >>>> @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
> >>>>     previous iteration.  */
> >>>>
> >>>>  rtx
> >>>> -builtin_memset_read_str (void *data, void *prevp,
> >>>> +builtin_memset_read_str (void *data, void *prev,
> >>>>                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >>>>                          scalar_int_mode mode)
> >>>>  {
> >>>> -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> >>>> -  if (prev != nullptr && prev->data != nullptr)
> >>>> -    {
> >>>> -      /* Use the previous data in the same mode.  */
> >>>> -      if (prev->mode == mode)
> >>>> -       return prev->data;
> >>>> -    }
> >>>> -
> >>>> -  const char *c = (const char *) data;
> >>>> -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> >>>> -
> >>>> -  memset (p, *c, GET_MODE_SIZE (mode));
> >>>> -
> >>>> -  return c_readstr (p, mode);
> >>>> +  return targetm.read_memset_value ((const char *) data, prev, mode);
> >>>>  }
> >>>>
> >>>>  /* Callback routine for store_by_pieces.  Return the RTL of a register
> >>>> @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
> >>>>     nullptr, it has the RTL info from the previous iteration.  */
> >>>>
> >>>>  static rtx
> >>>> -builtin_memset_gen_str (void *data, void *prevp,
> >>>> +builtin_memset_gen_str (void *data, void *prev,
> >>>>                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >>>>                         scalar_int_mode mode)
> >>>>  {
> >>>> -  rtx target, coeff;
> >>>> -  size_t size;
> >>>> -  char *p;
> >>>> -
> >>>> -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> >>>> -  if (prev != nullptr && prev->data != nullptr)
> >>>> -    {
> >>>> -      /* Use the previous data in the same mode.  */
> >>>> -      if (prev->mode == mode)
> >>>> -       return prev->data;
> >>>> -
> >>>> -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> >>>> -      if (target != nullptr)
> >>>> -       return target;
> >>>> -    }
> >>>> -
> >>>> -  size = GET_MODE_SIZE (mode);
> >>>> -  if (size == 1)
> >>>> -    return (rtx) data;
> >>>> -
> >>>> -  p = XALLOCAVEC (char, size);
> >>>> -  memset (p, 1, size);
> >>>> -  coeff = c_readstr (p, mode);
> >>>> -
> >>>> -  target = convert_to_mode (mode, (rtx) data, 1);
> >>>> -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> >>>> -  return force_reg (mode, target);
> >>>> +  return targetm.gen_memset_value ((rtx) data, prev, mode);
> >>>>  }
> >>>>
> >>>>  /* Expand expression EXP, which is a call to the memset builtin.  Return
> >>>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> >>>> index 85ea9395560..51385044e76 100644
> >>>> --- a/gcc/doc/tm.texi
> >>>> +++ b/gcc/doc/tm.texi
> >>>> @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
> >>>>   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
> >>>>  @end deftypefn
> >>>>
> >>>> +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
> >>>> +This function returns the RTL of a constant integer corresponding to
> >>>> +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
> >>>> +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
> >>>> +the RTL information from the previous interation.
> >>>> +@end deftypefn
> >>>> +
> >>>> +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
> >>>> +This function returns the RTL of a register containing
> >>>> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> >>>> +char value given in the RTL register @var{data}.  For example, if
> >>>> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> >>>> +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
> >>>> +the previous iteration.
> >>>> +@end deftypefn
> >>>> +
> >>>>  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
> >>>>  This target hook returns a new value for the number of times @var{loop}
> >>>>  should be unrolled. The parameter @var{nunroll} is the number of times
> >>>> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> >>>> index d8e3de14af1..8d4c3949fbf 100644
> >>>> --- a/gcc/doc/tm.texi.in
> >>>> +++ b/gcc/doc/tm.texi.in
> >>>> @@ -7956,6 +7956,10 @@ lists.
> >>>>
> >>>>  @hook TARGET_GEN_CCMP_NEXT
> >>>>
> >>>> +@hook TARGET_READ_MEMSET_VALUE
> >>>> +
> >>>> +@hook TARGET_GEN_MEMSET_VALUE
> >>>> +
> >>>>  @hook TARGET_LOOP_UNROLL_ADJUST
> >>>>
> >>>>  @defmac POWI_MAX_MULTS
> >>>> diff --git a/gcc/target.def b/gcc/target.def
> >>>> index bbaf6b4f3a0..c9aca40fa88 100644
> >>>> --- a/gcc/target.def
> >>>> +++ b/gcc/target.def
> >>>> @@ -2694,6 +2694,26 @@ DEFHOOK
> >>>>   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
> >>>>   NULL)
> >>>>
> >>>> +DEFHOOK
> >>>> +(read_memset_value,
> >>>> + "This function returns the RTL of a constant integer corresponding to\n\
> >>>> +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
> >>>> +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> >>>
> >>> where is 'str' defined?  I can't really tell what's the difference
> >>
> >> Fixed with
> >>
> >> diff --git a/gcc/target.def b/gcc/target.def
> >> index c9aca40fa88..4c3a5fcc634 100644
> >> --- a/gcc/target.def
> >> +++ b/gcc/target.def
> >> @@ -2699,8 +2699,8 @@ DEFHOOK
> >>   "This function returns the RTL of a constant integer corresponding to\n\
> >>  target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the string\n\
> >>  constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> >> -the RTL information from the previous interation.",
> >> - rtx, (const char *c, void *prev, scalar_int_mode mode),
> >> +the RTL information from the previous iteration.",
> >> + rtx, (const char *str, void *prev, scalar_int_mode mode),
> >>   default_read_memset_value)
> >>
> >>  DEFHOOK
> >>
> >>> from read_memset_value
> >>> and gen_memset_value.
> >>
> >> The difference is that input of read_memset_value is a string constant
> >> like "123" and input of gen_memset_value is an RTL register.
> >>
> >>> Somehow I feel that an optab for the "splat" operation similar
> >>> to vec_duplicate might be a better way to expose this - of course
> >>> that doesn't handle the "prev" thing.
> >>
> >> The x86 backend has ix86_expand_vector_init_duplicate () to
> >> broadcast QImode to TImode/OImode/XImode:
> >>
> >> /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
> >>    with all elements equal to VAR.  Return true if successful.  */
> >>
> >> bool
> >> ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
> >>                                    rtx target, rtx val)
> >>
> >>> So how's this the right point of abstraction to the target?
> >>
> >> I can add 2 target hooks, one for scratch register and one for
> >> broadcasting QImode to TImode/OImode/XImode.   Then I can
> >> move x86 codes to the middle-end.
> >>
> >
> > Here is the patch to add 3 target hooks:
> >
> >  -- Target Hook: rtx TARGET_READ_MEMSET_VALUE (const char *C,
> >           scalar_int_mode MODE)
> >      This function returns the RTL of a constant integer corresponding
> >      to target reading 'GET_MODE_SIZE (MODE)' bytes from the string
> >      constant C.
> >
> >  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, scalar_int_mode
> >           MODE)
> >      This function returns the RTL of a register containing
> >      'GET_MODE_SIZE (MODE)' consecutive copies of the unsigned char
> >      value given in the RTL register DATA.  For example, if MODE is 4
> >      bytes wide, return the RTL for 0x01010101*DATA.
> >
> >  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE_FROM_PREV (void *PREV,
> >           scalar_int_mode MODE)
> >      This function returns the RTL of a register in MODE generated from
> >      PREV in the previous iteration.
> >
> > with
> >
> > /* Return the RTL of a register in MODE generated from PREV in the
> >    previous iteration.  */
> >
> > static rtx
> > gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
> > {
> >   by_pieces_prev *prev = (by_pieces_prev *) prevp;
> >   rtx value;
> >   if (prev != nullptr && prev->data != nullptr)
> >     {
> >       /* Use the previous data in the same mode.  */
> >       if (prev->mode == mode)
> >         return prev->data;
> >
> >       value = targetm.gen_memset_value_from_prev (prevp, mode);
> >     }
> >   else
> >     value = nullptr;
> >   return value;
> > }
> >
> > /* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
> >    bytes from constant string DATA + OFFSET and return it as target
> >    constant.  If PREV isn't nullptr, it has the RTL info from the
> >    previous iteration.  */
> >
> > rtx
> > builtin_memset_read_str (void *data, void *prev,
> >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >                          scalar_int_mode mode)
> > {
> >   const char *str = (const char *) data;
> >
> >   /* Don't use the previous value if size is 1.  */
> >   if (GET_MODE_SIZE (mode) == 1)
> >     return default_read_memset_value (str, mode);
> >
> >   rtx value = gen_memset_value_from_prev (prev, mode);
> >   if (value)
> >     return value;
> >
> >   return targetm.read_memset_value (str, mode);
> > }
> >
> > /* Callback routine for store_by_pieces.  Return the RTL of a register
> >    containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned
> >    char value given in the RTL register data.  For example, if mode is
> >    4 bytes wide, return the RTL for 0x01010101*data.  If PREV isn't
> >    nullptr, it has the RTL info from the previous iteration.  */
> >
> > static rtx
> > builtin_memset_gen_str (void *datap, void *prev,
> >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >                         scalar_int_mode mode)
> > {
> >   rtx data = (rtx) datap;
> >
> >   /* Don't use the previous value if size is 1.  */
> >   if (GET_MODE_SIZE (mode) == 1)
> >     return data;
> >
> >   rtx value = gen_memset_value_from_prev (prev, mode);
> >   if (value)
> >     return value;
> >
> >   return targetm.gen_memset_value (data, mode);
> > }> +/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
> > +
> > +rtx
> > +default_gen_memset_value (rtx data, scalar_int_mode mode)
> > +{
> > +  rtx target, coeff;
> > +  size_t size;
> > +  char *p;
> > +
> > +  size = GET_MODE_SIZE (mode);
> > +  if (size == 1)
> > +    return data;
> > +
> > +  p = XALLOCAVEC (char, size);
> > +  memset (p, 1, size);
> > +  coeff = c_readstr (p, mode);
> > +
> > +  target = convert_to_mode (mode, data, 1);
> > +  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
>
>
> Note this formula does not work for data = -1 for instance,
> since 0x01010101U * -1 = 0xFEFEFEFFU
> but memset(str, n, -1) set it to 0xFFFFFFFFU, right?
>
> So are we sure that the value of "data" is always in the range [0..255] ?

I didn't change the formula.  I just turned the current formula into
a target hook.  For memset,

DESCRIPTION
       The  memset()  function  fills  the  first  n  bytes of the memory area
       pointed to by s with the constant byte c.

"data", aka the constant byte c, is a byte in the range [0..255].
Richard Biener May 25, 2021, 2:34 p.m. UTC | #6
On Thu, May 20, 2021 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, May 19, 2021 at 5:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, May 19, 2021 at 2:25 AM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
> > > > target instructions to duplicate QImode value to TImode/OImode/XImode
> > > > value for memmset.
> > > >
> > > >         PR middle-end/90773
> > > >         * builtins.c (builtin_memset_read_str): Call
> > > >         targetm.read_memset_value.
> > > >         (builtin_memset_gen_str): Call targetm.gen_memset_value.
> > > >         * target.def (read_memset_value): New hook.
> > > >         (gen_memset_value): Likewise.
> > > >         * targhooks.c: Inclue "builtins.h".
> > > >         (default_read_memset_value): New function.
> > > >         (default_gen_memset_value): Likewise.
> > > >         * targhooks.h (default_read_memset_value): New prototype.
> > > >         (default_gen_memset_value): Likewise.
> > > >         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
> > > >         TARGET_GEN_MEMSET_VALUE hooks.
> > > >         * doc/tm.texi: Regenerated.
> > > > ---
> > > >  gcc/builtins.c     | 47 ++++----------------------------------
> > > >  gcc/doc/tm.texi    | 16 +++++++++++++
> > > >  gcc/doc/tm.texi.in |  4 ++++
> > > >  gcc/target.def     | 20 +++++++++++++++++
> > > >  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
> > > >  gcc/targhooks.h    |  4 ++++
> > > >  6 files changed, 104 insertions(+), 43 deletions(-)
> > > >
> > > > diff --git a/gcc/builtins.c b/gcc/builtins.c
> > > > index e1b284846b1..f78a36478ef 100644
> > > > --- a/gcc/builtins.c
> > > > +++ b/gcc/builtins.c
> > > > @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
> > > >     previous iteration.  */
> > > >
> > > >  rtx
> > > > -builtin_memset_read_str (void *data, void *prevp,
> > > > +builtin_memset_read_str (void *data, void *prev,
> > > >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > > >                          scalar_int_mode mode)
> > > >  {
> > > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > > -  if (prev != nullptr && prev->data != nullptr)
> > > > -    {
> > > > -      /* Use the previous data in the same mode.  */
> > > > -      if (prev->mode == mode)
> > > > -       return prev->data;
> > > > -    }
> > > > -
> > > > -  const char *c = (const char *) data;
> > > > -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> > > > -
> > > > -  memset (p, *c, GET_MODE_SIZE (mode));
> > > > -
> > > > -  return c_readstr (p, mode);
> > > > +  return targetm.read_memset_value ((const char *) data, prev, mode);
> > > >  }
> > > >
> > > >  /* Callback routine for store_by_pieces.  Return the RTL of a register
> > > > @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
> > > >     nullptr, it has the RTL info from the previous iteration.  */
> > > >
> > > >  static rtx
> > > > -builtin_memset_gen_str (void *data, void *prevp,
> > > > +builtin_memset_gen_str (void *data, void *prev,
> > > >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > > >                         scalar_int_mode mode)
> > > >  {
> > > > -  rtx target, coeff;
> > > > -  size_t size;
> > > > -  char *p;
> > > > -
> > > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > > -  if (prev != nullptr && prev->data != nullptr)
> > > > -    {
> > > > -      /* Use the previous data in the same mode.  */
> > > > -      if (prev->mode == mode)
> > > > -       return prev->data;
> > > > -
> > > > -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> > > > -      if (target != nullptr)
> > > > -       return target;
> > > > -    }
> > > > -
> > > > -  size = GET_MODE_SIZE (mode);
> > > > -  if (size == 1)
> > > > -    return (rtx) data;
> > > > -
> > > > -  p = XALLOCAVEC (char, size);
> > > > -  memset (p, 1, size);
> > > > -  coeff = c_readstr (p, mode);
> > > > -
> > > > -  target = convert_to_mode (mode, (rtx) data, 1);
> > > > -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> > > > -  return force_reg (mode, target);
> > > > +  return targetm.gen_memset_value ((rtx) data, prev, mode);
> > > >  }
> > > >
> > > >  /* Expand expression EXP, which is a call to the memset builtin.  Return
> > > > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> > > > index 85ea9395560..51385044e76 100644
> > > > --- a/gcc/doc/tm.texi
> > > > +++ b/gcc/doc/tm.texi
> > > > @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
> > > >   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
> > > >  @end deftypefn
> > > >
> > > > +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
> > > > +This function returns the RTL of a constant integer corresponding to
> > > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
> > > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
> > > > +the RTL information from the previous interation.
> > > > +@end deftypefn
> > > > +
> > > > +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
> > > > +This function returns the RTL of a register containing
> > > > +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> > > > +char value given in the RTL register @var{data}.  For example, if
> > > > +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> > > > +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
> > > > +the previous iteration.
> > > > +@end deftypefn
> > > > +
> > > >  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
> > > >  This target hook returns a new value for the number of times @var{loop}
> > > >  should be unrolled. The parameter @var{nunroll} is the number of times
> > > > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> > > > index d8e3de14af1..8d4c3949fbf 100644
> > > > --- a/gcc/doc/tm.texi.in
> > > > +++ b/gcc/doc/tm.texi.in
> > > > @@ -7956,6 +7956,10 @@ lists.
> > > >
> > > >  @hook TARGET_GEN_CCMP_NEXT
> > > >
> > > > +@hook TARGET_READ_MEMSET_VALUE
> > > > +
> > > > +@hook TARGET_GEN_MEMSET_VALUE
> > > > +
> > > >  @hook TARGET_LOOP_UNROLL_ADJUST
> > > >
> > > >  @defmac POWI_MAX_MULTS
> > > > diff --git a/gcc/target.def b/gcc/target.def
> > > > index bbaf6b4f3a0..c9aca40fa88 100644
> > > > --- a/gcc/target.def
> > > > +++ b/gcc/target.def
> > > > @@ -2694,6 +2694,26 @@ DEFHOOK
> > > >   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
> > > >   NULL)
> > > >
> > > > +DEFHOOK
> > > > +(read_memset_value,
> > > > + "This function returns the RTL of a constant integer corresponding to\n\
> > > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
> > > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> > >
> > > where is 'str' defined?  I can't really tell what's the difference
> >
> > Fixed with
> >
> > diff --git a/gcc/target.def b/gcc/target.def
> > index c9aca40fa88..4c3a5fcc634 100644
> > --- a/gcc/target.def
> > +++ b/gcc/target.def
> > @@ -2699,8 +2699,8 @@ DEFHOOK
> >   "This function returns the RTL of a constant integer corresponding to\n\
> >  target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the string\n\
> >  constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> > -the RTL information from the previous interation.",
> > - rtx, (const char *c, void *prev, scalar_int_mode mode),
> > +the RTL information from the previous iteration.",
> > + rtx, (const char *str, void *prev, scalar_int_mode mode),
> >   default_read_memset_value)
> >
> >  DEFHOOK
> >
> > > from read_memset_value
> > > and gen_memset_value.
> >
> > The difference is that input of read_memset_value is a string constant
> > like "123" and input of gen_memset_value is an RTL register.
> >
> > > Somehow I feel that an optab for the "splat" operation similar
> > > to vec_duplicate might be a better way to expose this - of course
> > > that doesn't handle the "prev" thing.
> >
> > The x86 backend has ix86_expand_vector_init_duplicate () to
> > broadcast QImode to TImode/OImode/XImode:
> >
> > /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
> >    with all elements equal to VAR.  Return true if successful.  */
> >
> > bool
> > ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
> >                                    rtx target, rtx val)
> >
> > > So how's this the right point of abstraction to the target?
> >
> > I can add 2 target hooks, one for scratch register and one for
> > broadcasting QImode to TImode/OImode/XImode.   Then I can
> > move x86 codes to the middle-end.
> >
>
> Here is the patch to add 3 target hooks:
>
>  -- Target Hook: rtx TARGET_READ_MEMSET_VALUE (const char *C,
>           scalar_int_mode MODE)
>      This function returns the RTL of a constant integer corresponding
>      to target reading 'GET_MODE_SIZE (MODE)' bytes from the string
>      constant C.
>
>  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, scalar_int_mode
>           MODE)
>      This function returns the RTL of a register containing
>      'GET_MODE_SIZE (MODE)' consecutive copies of the unsigned char
>      value given in the RTL register DATA.  For example, if MODE is 4
>      bytes wide, return the RTL for 0x01010101*DATA.

For this one I wonder if it should be an optab instead.  Couldn't you
use the existing vec_duplicate for this by using (paradoxical) subregs
like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI ...)))?

Currently vec_duplicate is only used for SVE IIRC.

>  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE_FROM_PREV (void *PREV,
>           scalar_int_mode MODE)
>      This function returns the RTL of a register in MODE generated from
>      PREV in the previous iteration.
>
> with
>
> /* Return the RTL of a register in MODE generated from PREV in the
>    previous iteration.  */
>
> static rtx
> gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
> {
>   by_pieces_prev *prev = (by_pieces_prev *) prevp;
>   rtx value;
>   if (prev != nullptr && prev->data != nullptr)
>     {
>       /* Use the previous data in the same mode.  */
>       if (prev->mode == mode)
>         return prev->data;
>
>       value = targetm.gen_memset_value_from_prev (prevp, mode);

But why do we need a target hook here?  Doesn't this just duplicate/subreg
this further?

>     }
>   else
>     value = nullptr;
>   return value;
> }
>
> /* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
>    bytes from constant string DATA + OFFSET and return it as target
>    constant.  If PREV isn't nullptr, it has the RTL info from the
>    previous iteration.  */
>
> rtx
> builtin_memset_read_str (void *data, void *prev,
>                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                          scalar_int_mode mode)
> {
>   const char *str = (const char *) data;
>
>   /* Don't use the previous value if size is 1.  */
>   if (GET_MODE_SIZE (mode) == 1)
>     return default_read_memset_value (str, mode);
>
>   rtx value = gen_memset_value_from_prev (prev, mode);
>   if (value)
>     return value;
>
>   return targetm.read_memset_value (str, mode);
> }
>
> /* Callback routine for store_by_pieces.  Return the RTL of a register
>    containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned
>    char value given in the RTL register data.  For example, if mode is
>    4 bytes wide, return the RTL for 0x01010101*data.  If PREV isn't
>    nullptr, it has the RTL info from the previous iteration.  */
>
> static rtx
> builtin_memset_gen_str (void *datap, void *prev,
>                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                         scalar_int_mode mode)
> {
>   rtx data = (rtx) datap;
>
>   /* Don't use the previous value if size is 1.  */
>   if (GET_MODE_SIZE (mode) == 1)
>     return data;
>
>   rtx value = gen_memset_value_from_prev (prev, mode);
>   if (value)
>     return value;
>
>   return targetm.gen_memset_value (data, mode);
> }
>
>
> --
> H.J.
H.J. Lu May 25, 2021, 3:11 p.m. UTC | #7
On Tue, May 25, 2021 at 7:34 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, May 20, 2021 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, May 19, 2021 at 5:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Wed, May 19, 2021 at 2:25 AM Richard Biener
> > > <richard.guenther@gmail.com> wrote:
> > > >
> > > > On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
> > > > > target instructions to duplicate QImode value to TImode/OImode/XImode
> > > > > value for memmset.
> > > > >
> > > > >         PR middle-end/90773
> > > > >         * builtins.c (builtin_memset_read_str): Call
> > > > >         targetm.read_memset_value.
> > > > >         (builtin_memset_gen_str): Call targetm.gen_memset_value.
> > > > >         * target.def (read_memset_value): New hook.
> > > > >         (gen_memset_value): Likewise.
> > > > >         * targhooks.c: Inclue "builtins.h".
> > > > >         (default_read_memset_value): New function.
> > > > >         (default_gen_memset_value): Likewise.
> > > > >         * targhooks.h (default_read_memset_value): New prototype.
> > > > >         (default_gen_memset_value): Likewise.
> > > > >         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
> > > > >         TARGET_GEN_MEMSET_VALUE hooks.
> > > > >         * doc/tm.texi: Regenerated.
> > > > > ---
> > > > >  gcc/builtins.c     | 47 ++++----------------------------------
> > > > >  gcc/doc/tm.texi    | 16 +++++++++++++
> > > > >  gcc/doc/tm.texi.in |  4 ++++
> > > > >  gcc/target.def     | 20 +++++++++++++++++
> > > > >  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  gcc/targhooks.h    |  4 ++++
> > > > >  6 files changed, 104 insertions(+), 43 deletions(-)
> > > > >
> > > > > diff --git a/gcc/builtins.c b/gcc/builtins.c
> > > > > index e1b284846b1..f78a36478ef 100644
> > > > > --- a/gcc/builtins.c
> > > > > +++ b/gcc/builtins.c
> > > > > @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
> > > > >     previous iteration.  */
> > > > >
> > > > >  rtx
> > > > > -builtin_memset_read_str (void *data, void *prevp,
> > > > > +builtin_memset_read_str (void *data, void *prev,
> > > > >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > > > >                          scalar_int_mode mode)
> > > > >  {
> > > > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > > > -  if (prev != nullptr && prev->data != nullptr)
> > > > > -    {
> > > > > -      /* Use the previous data in the same mode.  */
> > > > > -      if (prev->mode == mode)
> > > > > -       return prev->data;
> > > > > -    }
> > > > > -
> > > > > -  const char *c = (const char *) data;
> > > > > -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> > > > > -
> > > > > -  memset (p, *c, GET_MODE_SIZE (mode));
> > > > > -
> > > > > -  return c_readstr (p, mode);
> > > > > +  return targetm.read_memset_value ((const char *) data, prev, mode);
> > > > >  }
> > > > >
> > > > >  /* Callback routine for store_by_pieces.  Return the RTL of a register
> > > > > @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
> > > > >     nullptr, it has the RTL info from the previous iteration.  */
> > > > >
> > > > >  static rtx
> > > > > -builtin_memset_gen_str (void *data, void *prevp,
> > > > > +builtin_memset_gen_str (void *data, void *prev,
> > > > >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > > > >                         scalar_int_mode mode)
> > > > >  {
> > > > > -  rtx target, coeff;
> > > > > -  size_t size;
> > > > > -  char *p;
> > > > > -
> > > > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > > > -  if (prev != nullptr && prev->data != nullptr)
> > > > > -    {
> > > > > -      /* Use the previous data in the same mode.  */
> > > > > -      if (prev->mode == mode)
> > > > > -       return prev->data;
> > > > > -
> > > > > -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> > > > > -      if (target != nullptr)
> > > > > -       return target;
> > > > > -    }
> > > > > -
> > > > > -  size = GET_MODE_SIZE (mode);
> > > > > -  if (size == 1)
> > > > > -    return (rtx) data;
> > > > > -
> > > > > -  p = XALLOCAVEC (char, size);
> > > > > -  memset (p, 1, size);
> > > > > -  coeff = c_readstr (p, mode);
> > > > > -
> > > > > -  target = convert_to_mode (mode, (rtx) data, 1);
> > > > > -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> > > > > -  return force_reg (mode, target);
> > > > > +  return targetm.gen_memset_value ((rtx) data, prev, mode);
> > > > >  }
> > > > >
> > > > >  /* Expand expression EXP, which is a call to the memset builtin.  Return
> > > > > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> > > > > index 85ea9395560..51385044e76 100644
> > > > > --- a/gcc/doc/tm.texi
> > > > > +++ b/gcc/doc/tm.texi
> > > > > @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
> > > > >   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
> > > > >  @end deftypefn
> > > > >
> > > > > +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
> > > > > +This function returns the RTL of a constant integer corresponding to
> > > > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
> > > > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
> > > > > +the RTL information from the previous interation.
> > > > > +@end deftypefn
> > > > > +
> > > > > +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
> > > > > +This function returns the RTL of a register containing
> > > > > +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> > > > > +char value given in the RTL register @var{data}.  For example, if
> > > > > +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> > > > > +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
> > > > > +the previous iteration.
> > > > > +@end deftypefn
> > > > > +
> > > > >  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
> > > > >  This target hook returns a new value for the number of times @var{loop}
> > > > >  should be unrolled. The parameter @var{nunroll} is the number of times
> > > > > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> > > > > index d8e3de14af1..8d4c3949fbf 100644
> > > > > --- a/gcc/doc/tm.texi.in
> > > > > +++ b/gcc/doc/tm.texi.in
> > > > > @@ -7956,6 +7956,10 @@ lists.
> > > > >
> > > > >  @hook TARGET_GEN_CCMP_NEXT
> > > > >
> > > > > +@hook TARGET_READ_MEMSET_VALUE
> > > > > +
> > > > > +@hook TARGET_GEN_MEMSET_VALUE
> > > > > +
> > > > >  @hook TARGET_LOOP_UNROLL_ADJUST
> > > > >
> > > > >  @defmac POWI_MAX_MULTS
> > > > > diff --git a/gcc/target.def b/gcc/target.def
> > > > > index bbaf6b4f3a0..c9aca40fa88 100644
> > > > > --- a/gcc/target.def
> > > > > +++ b/gcc/target.def
> > > > > @@ -2694,6 +2694,26 @@ DEFHOOK
> > > > >   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
> > > > >   NULL)
> > > > >
> > > > > +DEFHOOK
> > > > > +(read_memset_value,
> > > > > + "This function returns the RTL of a constant integer corresponding to\n\
> > > > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
> > > > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> > > >
> > > > where is 'str' defined?  I can't really tell what's the difference
> > >
> > > Fixed with
> > >
> > > diff --git a/gcc/target.def b/gcc/target.def
> > > index c9aca40fa88..4c3a5fcc634 100644
> > > --- a/gcc/target.def
> > > +++ b/gcc/target.def
> > > @@ -2699,8 +2699,8 @@ DEFHOOK
> > >   "This function returns the RTL of a constant integer corresponding to\n\
> > >  target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the string\n\
> > >  constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> > > -the RTL information from the previous interation.",
> > > - rtx, (const char *c, void *prev, scalar_int_mode mode),
> > > +the RTL information from the previous iteration.",
> > > + rtx, (const char *str, void *prev, scalar_int_mode mode),
> > >   default_read_memset_value)
> > >
> > >  DEFHOOK
> > >
> > > > from read_memset_value
> > > > and gen_memset_value.
> > >
> > > The difference is that input of read_memset_value is a string constant
> > > like "123" and input of gen_memset_value is an RTL register.
> > >
> > > > Somehow I feel that an optab for the "splat" operation similar
> > > > to vec_duplicate might be a better way to expose this - of course
> > > > that doesn't handle the "prev" thing.
> > >
> > > The x86 backend has ix86_expand_vector_init_duplicate () to
> > > broadcast QImode to TImode/OImode/XImode:
> > >
> > > /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
> > >    with all elements equal to VAR.  Return true if successful.  */
> > >
> > > bool
> > > ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
> > >                                    rtx target, rtx val)
> > >
> > > > So how's this the right point of abstraction to the target?
> > >
> > > I can add 2 target hooks, one for scratch register and one for
> > > broadcasting QImode to TImode/OImode/XImode.   Then I can
> > > move x86 codes to the middle-end.
> > >
> >
> > Here is the patch to add 3 target hooks:
> >
> >  -- Target Hook: rtx TARGET_READ_MEMSET_VALUE (const char *C,
> >           scalar_int_mode MODE)
> >      This function returns the RTL of a constant integer corresponding
> >      to target reading 'GET_MODE_SIZE (MODE)' bytes from the string
> >      constant C.
> >
> >  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, scalar_int_mode
> >           MODE)
> >      This function returns the RTL of a register containing
> >      'GET_MODE_SIZE (MODE)' consecutive copies of the unsigned char
> >      value given in the RTL register DATA.  For example, if MODE is 4
> >      bytes wide, return the RTL for 0x01010101*DATA.
>
> For this one I wonder if it should be an optab instead.  Couldn't you
> use the existing vec_duplicate for this by using (paradoxical) subregs
> like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI ...)))?

I tried.   It doesn't even work on x86.  See:

https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html

There are special cases to subreg HI, SI and DI modes of TI mode in
ix86_gen_memset_value_from_prev.   simplify_gen_subreg doesn't
work here.   Each backend may need its own special handling.

> Currently vec_duplicate is only used for SVE IIRC.
>
> >  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE_FROM_PREV (void *PREV,
> >           scalar_int_mode MODE)
> >      This function returns the RTL of a register in MODE generated from
> >      PREV in the previous iteration.
> >
> > with
> >
> > /* Return the RTL of a register in MODE generated from PREV in the
> >    previous iteration.  */
> >
> > static rtx
> > gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
> > {
> >   by_pieces_prev *prev = (by_pieces_prev *) prevp;
> >   rtx value;
> >   if (prev != nullptr && prev->data != nullptr)
> >     {
> >       /* Use the previous data in the same mode.  */
> >       if (prev->mode == mode)
> >         return prev->data;
> >
> >       value = targetm.gen_memset_value_from_prev (prevp, mode);
>
> But why do we need a target hook here?  Doesn't this just duplicate/subreg
> this further?

See above.

> >     }
> >   else
> >     value = nullptr;
> >   return value;
> > }
> >
> > /* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
> >    bytes from constant string DATA + OFFSET and return it as target
> >    constant.  If PREV isn't nullptr, it has the RTL info from the
> >    previous iteration.  */
> >
> > rtx
> > builtin_memset_read_str (void *data, void *prev,
> >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >                          scalar_int_mode mode)
> > {
> >   const char *str = (const char *) data;
> >
> >   /* Don't use the previous value if size is 1.  */
> >   if (GET_MODE_SIZE (mode) == 1)
> >     return default_read_memset_value (str, mode);
> >
> >   rtx value = gen_memset_value_from_prev (prev, mode);
> >   if (value)
> >     return value;
> >
> >   return targetm.read_memset_value (str, mode);
> > }
> >
> > /* Callback routine for store_by_pieces.  Return the RTL of a register
> >    containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned
> >    char value given in the RTL register data.  For example, if mode is
> >    4 bytes wide, return the RTL for 0x01010101*data.  If PREV isn't
> >    nullptr, it has the RTL info from the previous iteration.  */
> >
> > static rtx
> > builtin_memset_gen_str (void *datap, void *prev,
> >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> >                         scalar_int_mode mode)
> > {
> >   rtx data = (rtx) datap;
> >
> >   /* Don't use the previous value if size is 1.  */
> >   if (GET_MODE_SIZE (mode) == 1)
> >     return data;
> >
> >   rtx value = gen_memset_value_from_prev (prev, mode);
> >   if (value)
> >     return value;
> >
> >   return targetm.gen_memset_value (data, mode);
> > }
> >
> >
> > --
> > H.J.
Richard Biener May 26, 2021, 8:28 a.m. UTC | #8
On Tue, May 25, 2021 at 5:12 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, May 25, 2021 at 7:34 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Thu, May 20, 2021 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Wed, May 19, 2021 at 5:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Wed, May 19, 2021 at 2:25 AM Richard Biener
> > > > <richard.guenther@gmail.com> wrote:
> > > > >
> > > > > On Tue, May 18, 2021 at 9:16 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > Add TARGET_READ_MEMSET_VALUE and TARGET_GEN_MEMSET_VALUE to support
> > > > > > target instructions to duplicate QImode value to TImode/OImode/XImode
> > > > > > value for memmset.
> > > > > >
> > > > > >         PR middle-end/90773
> > > > > >         * builtins.c (builtin_memset_read_str): Call
> > > > > >         targetm.read_memset_value.
> > > > > >         (builtin_memset_gen_str): Call targetm.gen_memset_value.
> > > > > >         * target.def (read_memset_value): New hook.
> > > > > >         (gen_memset_value): Likewise.
> > > > > >         * targhooks.c: Inclue "builtins.h".
> > > > > >         (default_read_memset_value): New function.
> > > > > >         (default_gen_memset_value): Likewise.
> > > > > >         * targhooks.h (default_read_memset_value): New prototype.
> > > > > >         (default_gen_memset_value): Likewise.
> > > > > >         * doc/tm.texi.in: Add TARGET_READ_MEMSET_VALUE and
> > > > > >         TARGET_GEN_MEMSET_VALUE hooks.
> > > > > >         * doc/tm.texi: Regenerated.
> > > > > > ---
> > > > > >  gcc/builtins.c     | 47 ++++----------------------------------
> > > > > >  gcc/doc/tm.texi    | 16 +++++++++++++
> > > > > >  gcc/doc/tm.texi.in |  4 ++++
> > > > > >  gcc/target.def     | 20 +++++++++++++++++
> > > > > >  gcc/targhooks.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > > >  gcc/targhooks.h    |  4 ++++
> > > > > >  6 files changed, 104 insertions(+), 43 deletions(-)
> > > > > >
> > > > > > diff --git a/gcc/builtins.c b/gcc/builtins.c
> > > > > > index e1b284846b1..f78a36478ef 100644
> > > > > > --- a/gcc/builtins.c
> > > > > > +++ b/gcc/builtins.c
> > > > > > @@ -6584,24 +6584,11 @@ expand_builtin_strncpy (tree exp, rtx target)
> > > > > >     previous iteration.  */
> > > > > >
> > > > > >  rtx
> > > > > > -builtin_memset_read_str (void *data, void *prevp,
> > > > > > +builtin_memset_read_str (void *data, void *prev,
> > > > > >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > > > > >                          scalar_int_mode mode)
> > > > > >  {
> > > > > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > > > > -  if (prev != nullptr && prev->data != nullptr)
> > > > > > -    {
> > > > > > -      /* Use the previous data in the same mode.  */
> > > > > > -      if (prev->mode == mode)
> > > > > > -       return prev->data;
> > > > > > -    }
> > > > > > -
> > > > > > -  const char *c = (const char *) data;
> > > > > > -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> > > > > > -
> > > > > > -  memset (p, *c, GET_MODE_SIZE (mode));
> > > > > > -
> > > > > > -  return c_readstr (p, mode);
> > > > > > +  return targetm.read_memset_value ((const char *) data, prev, mode);
> > > > > >  }
> > > > > >
> > > > > >  /* Callback routine for store_by_pieces.  Return the RTL of a register
> > > > > > @@ -6611,37 +6598,11 @@ builtin_memset_read_str (void *data, void *prevp,
> > > > > >     nullptr, it has the RTL info from the previous iteration.  */
> > > > > >
> > > > > >  static rtx
> > > > > > -builtin_memset_gen_str (void *data, void *prevp,
> > > > > > +builtin_memset_gen_str (void *data, void *prev,
> > > > > >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > > > > >                         scalar_int_mode mode)
> > > > > >  {
> > > > > > -  rtx target, coeff;
> > > > > > -  size_t size;
> > > > > > -  char *p;
> > > > > > -
> > > > > > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > > > > > -  if (prev != nullptr && prev->data != nullptr)
> > > > > > -    {
> > > > > > -      /* Use the previous data in the same mode.  */
> > > > > > -      if (prev->mode == mode)
> > > > > > -       return prev->data;
> > > > > > -
> > > > > > -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> > > > > > -      if (target != nullptr)
> > > > > > -       return target;
> > > > > > -    }
> > > > > > -
> > > > > > -  size = GET_MODE_SIZE (mode);
> > > > > > -  if (size == 1)
> > > > > > -    return (rtx) data;
> > > > > > -
> > > > > > -  p = XALLOCAVEC (char, size);
> > > > > > -  memset (p, 1, size);
> > > > > > -  coeff = c_readstr (p, mode);
> > > > > > -
> > > > > > -  target = convert_to_mode (mode, (rtx) data, 1);
> > > > > > -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> > > > > > -  return force_reg (mode, target);
> > > > > > +  return targetm.gen_memset_value ((rtx) data, prev, mode);
> > > > > >  }
> > > > > >
> > > > > >  /* Expand expression EXP, which is a call to the memset builtin.  Return
> > > > > > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> > > > > > index 85ea9395560..51385044e76 100644
> > > > > > --- a/gcc/doc/tm.texi
> > > > > > +++ b/gcc/doc/tm.texi
> > > > > > @@ -11868,6 +11868,22 @@ This function prepares to emit a conditional comparison within a sequence
> > > > > >   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
> > > > > >  @end deftypefn
> > > > > >
> > > > > > +@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
> > > > > > +This function returns the RTL of a constant integer corresponding to
> > > > > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
> > > > > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
> > > > > > +the RTL information from the previous interation.
> > > > > > +@end deftypefn
> > > > > > +
> > > > > > +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
> > > > > > +This function returns the RTL of a register containing
> > > > > > +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> > > > > > +char value given in the RTL register @var{data}.  For example, if
> > > > > > +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> > > > > > +If @var{PREV} is not @samp{nullptr}, it is the RTL information from
> > > > > > +the previous iteration.
> > > > > > +@end deftypefn
> > > > > > +
> > > > > >  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
> > > > > >  This target hook returns a new value for the number of times @var{loop}
> > > > > >  should be unrolled. The parameter @var{nunroll} is the number of times
> > > > > > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> > > > > > index d8e3de14af1..8d4c3949fbf 100644
> > > > > > --- a/gcc/doc/tm.texi.in
> > > > > > +++ b/gcc/doc/tm.texi.in
> > > > > > @@ -7956,6 +7956,10 @@ lists.
> > > > > >
> > > > > >  @hook TARGET_GEN_CCMP_NEXT
> > > > > >
> > > > > > +@hook TARGET_READ_MEMSET_VALUE
> > > > > > +
> > > > > > +@hook TARGET_GEN_MEMSET_VALUE
> > > > > > +
> > > > > >  @hook TARGET_LOOP_UNROLL_ADJUST
> > > > > >
> > > > > >  @defmac POWI_MAX_MULTS
> > > > > > diff --git a/gcc/target.def b/gcc/target.def
> > > > > > index bbaf6b4f3a0..c9aca40fa88 100644
> > > > > > --- a/gcc/target.def
> > > > > > +++ b/gcc/target.def
> > > > > > @@ -2694,6 +2694,26 @@ DEFHOOK
> > > > > >   rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
> > > > > >   NULL)
> > > > > >
> > > > > > +DEFHOOK
> > > > > > +(read_memset_value,
> > > > > > + "This function returns the RTL of a constant integer corresponding to\n\
> > > > > > +target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
> > > > > > +constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> > > > >
> > > > > where is 'str' defined?  I can't really tell what's the difference
> > > >
> > > > Fixed with
> > > >
> > > > diff --git a/gcc/target.def b/gcc/target.def
> > > > index c9aca40fa88..4c3a5fcc634 100644
> > > > --- a/gcc/target.def
> > > > +++ b/gcc/target.def
> > > > @@ -2699,8 +2699,8 @@ DEFHOOK
> > > >   "This function returns the RTL of a constant integer corresponding to\n\
> > > >  target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the string\n\
> > > >  constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
> > > > -the RTL information from the previous interation.",
> > > > - rtx, (const char *c, void *prev, scalar_int_mode mode),
> > > > +the RTL information from the previous iteration.",
> > > > + rtx, (const char *str, void *prev, scalar_int_mode mode),
> > > >   default_read_memset_value)
> > > >
> > > >  DEFHOOK
> > > >
> > > > > from read_memset_value
> > > > > and gen_memset_value.
> > > >
> > > > The difference is that input of read_memset_value is a string constant
> > > > like "123" and input of gen_memset_value is an RTL register.
> > > >
> > > > > Somehow I feel that an optab for the "splat" operation similar
> > > > > to vec_duplicate might be a better way to expose this - of course
> > > > > that doesn't handle the "prev" thing.
> > > >
> > > > The x86 backend has ix86_expand_vector_init_duplicate () to
> > > > broadcast QImode to TImode/OImode/XImode:
> > > >
> > > > /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
> > > >    with all elements equal to VAR.  Return true if successful.  */
> > > >
> > > > bool
> > > > ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
> > > >                                    rtx target, rtx val)
> > > >
> > > > > So how's this the right point of abstraction to the target?
> > > >
> > > > I can add 2 target hooks, one for scratch register and one for
> > > > broadcasting QImode to TImode/OImode/XImode.   Then I can
> > > > move x86 codes to the middle-end.
> > > >
> > >
> > > Here is the patch to add 3 target hooks:
> > >
> > >  -- Target Hook: rtx TARGET_READ_MEMSET_VALUE (const char *C,
> > >           scalar_int_mode MODE)
> > >      This function returns the RTL of a constant integer corresponding
> > >      to target reading 'GET_MODE_SIZE (MODE)' bytes from the string
> > >      constant C.
> > >
> > >  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, scalar_int_mode
> > >           MODE)
> > >      This function returns the RTL of a register containing
> > >      'GET_MODE_SIZE (MODE)' consecutive copies of the unsigned char
> > >      value given in the RTL register DATA.  For example, if MODE is 4
> > >      bytes wide, return the RTL for 0x01010101*DATA.
> >
> > For this one I wonder if it should be an optab instead.  Couldn't you
> > use the existing vec_duplicate for this by using (paradoxical) subregs
> > like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI ...)))?
>
> I tried.   It doesn't even work on x86.  See:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html

Not sure what I should read from there...

> There are special cases to subreg HI, SI and DI modes of TI mode in
> ix86_gen_memset_value_from_prev.   simplify_gen_subreg doesn't
> work here.   Each backend may need its own special handling.

OK, I guess I'm not (RTL) qualified enough to further review these parts,
sorry.  Since we're doing code generation the canonical way to communicate
with backends should be optabs, not some set of disconnected target hooks.
But as said, I probably don't know enough of RTL to see why it's the only way.

Richard.

> > Currently vec_duplicate is only used for SVE IIRC.
> >
> > >  -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE_FROM_PREV (void *PREV,
> > >           scalar_int_mode MODE)
> > >      This function returns the RTL of a register in MODE generated from
> > >      PREV in the previous iteration.
> > >
> > > with
> > >
> > > /* Return the RTL of a register in MODE generated from PREV in the
> > >    previous iteration.  */
> > >
> > > static rtx
> > > gen_memset_value_from_prev (void *prevp, scalar_int_mode mode)
> > > {
> > >   by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > >   rtx value;
> > >   if (prev != nullptr && prev->data != nullptr)
> > >     {
> > >       /* Use the previous data in the same mode.  */
> > >       if (prev->mode == mode)
> > >         return prev->data;
> > >
> > >       value = targetm.gen_memset_value_from_prev (prevp, mode);
> >
> > But why do we need a target hook here?  Doesn't this just duplicate/subreg
> > this further?
>
> See above.
>
> > >     }
> > >   else
> > >     value = nullptr;
> > >   return value;
> > > }
> > >
> > > /* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
> > >    bytes from constant string DATA + OFFSET and return it as target
> > >    constant.  If PREV isn't nullptr, it has the RTL info from the
> > >    previous iteration.  */
> > >
> > > rtx
> > > builtin_memset_read_str (void *data, void *prev,
> > >                          HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > >                          scalar_int_mode mode)
> > > {
> > >   const char *str = (const char *) data;
> > >
> > >   /* Don't use the previous value if size is 1.  */
> > >   if (GET_MODE_SIZE (mode) == 1)
> > >     return default_read_memset_value (str, mode);
> > >
> > >   rtx value = gen_memset_value_from_prev (prev, mode);
> > >   if (value)
> > >     return value;
> > >
> > >   return targetm.read_memset_value (str, mode);
> > > }
> > >
> > > /* Callback routine for store_by_pieces.  Return the RTL of a register
> > >    containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned
> > >    char value given in the RTL register data.  For example, if mode is
> > >    4 bytes wide, return the RTL for 0x01010101*data.  If PREV isn't
> > >    nullptr, it has the RTL info from the previous iteration.  */
> > >
> > > static rtx
> > > builtin_memset_gen_str (void *datap, void *prev,
> > >                         HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > >                         scalar_int_mode mode)
> > > {
> > >   rtx data = (rtx) datap;
> > >
> > >   /* Don't use the previous value if size is 1.  */
> > >   if (GET_MODE_SIZE (mode) == 1)
> > >     return data;
> > >
> > >   rtx value = gen_memset_value_from_prev (prev, mode);
> > >   if (value)
> > >     return value;
> > >
> > >   return targetm.gen_memset_value (data, mode);
> > > }
> > >
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
diff mbox series

Patch

diff --git a/gcc/builtins.c b/gcc/builtins.c
index e1b284846b1..f78a36478ef 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6584,24 +6584,11 @@  expand_builtin_strncpy (tree exp, rtx target)
    previous iteration.  */
 
 rtx
-builtin_memset_read_str (void *data, void *prevp,
+builtin_memset_read_str (void *data, void *prev,
 			 HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
 			 scalar_int_mode mode)
 {
-  by_pieces_prev *prev = (by_pieces_prev *) prevp;
-  if (prev != nullptr && prev->data != nullptr)
-    {
-      /* Use the previous data in the same mode.  */
-      if (prev->mode == mode)
-	return prev->data;
-    }
-
-  const char *c = (const char *) data;
-  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
-
-  memset (p, *c, GET_MODE_SIZE (mode));
-
-  return c_readstr (p, mode);
+  return targetm.read_memset_value ((const char *) data, prev, mode);
 }
 
 /* Callback routine for store_by_pieces.  Return the RTL of a register
@@ -6611,37 +6598,11 @@  builtin_memset_read_str (void *data, void *prevp,
    nullptr, it has the RTL info from the previous iteration.  */
 
 static rtx
-builtin_memset_gen_str (void *data, void *prevp,
+builtin_memset_gen_str (void *data, void *prev,
 			HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
 			scalar_int_mode mode)
 {
-  rtx target, coeff;
-  size_t size;
-  char *p;
-
-  by_pieces_prev *prev = (by_pieces_prev *) prevp;
-  if (prev != nullptr && prev->data != nullptr)
-    {
-      /* Use the previous data in the same mode.  */
-      if (prev->mode == mode)
-	return prev->data;
-
-      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
-      if (target != nullptr)
-	return target;
-    }
-
-  size = GET_MODE_SIZE (mode);
-  if (size == 1)
-    return (rtx) data;
-
-  p = XALLOCAVEC (char, size);
-  memset (p, 1, size);
-  coeff = c_readstr (p, mode);
-
-  target = convert_to_mode (mode, (rtx) data, 1);
-  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
-  return force_reg (mode, target);
+  return targetm.gen_memset_value ((rtx) data, prev, mode);
 }
 
 /* Expand expression EXP, which is a call to the memset builtin.  Return
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 85ea9395560..51385044e76 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11868,6 +11868,22 @@  This function prepares to emit a conditional comparison within a sequence
  @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
 @end deftypefn
 
+@deftypefn {Target Hook} rtx TARGET_READ_MEMSET_VALUE (const char *@var{c}, void *@var{prev}, scalar_int_mode @var{mode})
+This function returns the RTL of a constant integer corresponding to
+target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn
+constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains
+the RTL information from the previous interation.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, void *@var{prev}, scalar_int_mode @var{mode})
+This function returns the RTL of a register containing
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
+char value given in the RTL register @var{data}.  For example, if
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
+If @var{PREV} is not @samp{nullptr}, it is the RTL information from
+the previous iteration.
+@end deftypefn
+
 @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, class loop *@var{loop})
 This target hook returns a new value for the number of times @var{loop}
 should be unrolled. The parameter @var{nunroll} is the number of times
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index d8e3de14af1..8d4c3949fbf 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7956,6 +7956,10 @@  lists.
 
 @hook TARGET_GEN_CCMP_NEXT
 
+@hook TARGET_READ_MEMSET_VALUE
+
+@hook TARGET_GEN_MEMSET_VALUE
+
 @hook TARGET_LOOP_UNROLL_ADJUST
 
 @defmac POWI_MAX_MULTS
diff --git a/gcc/target.def b/gcc/target.def
index bbaf6b4f3a0..c9aca40fa88 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2694,6 +2694,26 @@  DEFHOOK
  rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
  NULL)
 
+DEFHOOK
+(read_memset_value,
+ "This function returns the RTL of a constant integer corresponding to\n\
+target reading @code{GET_MODE_SIZE (@var{mode})} bytes from the stringn\n\
+constant @var{str}.  If @var{prev} is not @samp{nullptr}, it contains\n\
+the RTL information from the previous interation.",
+ rtx, (const char *c, void *prev, scalar_int_mode mode),
+ default_read_memset_value)
+
+DEFHOOK
+(gen_memset_value,
+ "This function returns the RTL of a register containing\n\
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
+char value given in the RTL register @var{data}.  For example, if\n\
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.\n\
+If @var{PREV} is not @samp{nullptr}, it is the RTL information from\n\
+the previous iteration.",
+ rtx, (rtx data, void *prev, scalar_int_mode mode),
+ default_gen_memset_value)
+
 /* Return a new value for loop unroll size.  */
 DEFHOOK
 (loop_unroll_adjust,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 1947ef26fd6..b55e6ec6756 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -90,6 +90,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "attribs.h"
 #include "asan.h"
 #include "emit-rtl.h"
+#include "builtins.h"
 
 bool
 default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
@@ -2627,4 +2628,59 @@  default_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
   return untagged_base;
 }
 
+/* Default implementation of TARGET_READ_MEMSET_VALUE.  */
+
+rtx
+default_read_memset_value (const char *c, void *prevp,
+			   scalar_int_mode mode)
+{
+  by_pieces_prev *prev = (by_pieces_prev *) prevp;
+  if (prev != nullptr && prev->data != nullptr)
+    {
+      /* Use the previous data in the same mode.  */
+      if (prev->mode == mode)
+	return prev->data;
+    }
+
+  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+
+  memset (p, *c, GET_MODE_SIZE (mode));
+
+  return c_readstr (p, mode);
+}
+
+/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
+
+rtx
+default_gen_memset_value (rtx data, void *prevp, scalar_int_mode mode)
+{
+  rtx target, coeff;
+  size_t size;
+  char *p;
+
+  by_pieces_prev *prev = (by_pieces_prev *) prevp;
+  if (prev != nullptr && prev->data != nullptr)
+    {
+      /* Use the previous data in the same mode.  */
+      if (prev->mode == mode)
+	return prev->data;
+
+      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
+      if (target != nullptr)
+	return target;
+    }
+
+  size = GET_MODE_SIZE (mode);
+  if (size == 1)
+    return data;
+
+  p = XALLOCAVEC (char, size);
+  memset (p, 1, size);
+  coeff = c_readstr (p, mode);
+
+  target = convert_to_mode (mode, data, 1);
+  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
+  return force_reg (mode, target);
+}
+
 #include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index b537038c0aa..3c00927e196 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -300,4 +300,8 @@  extern rtx default_memtag_set_tag (rtx, rtx, rtx);
 extern rtx default_memtag_extract_tag (rtx, rtx);
 extern rtx default_memtag_untagged_pointer (rtx, rtx);
 
+extern rtx default_read_memset_value (const char *, void *,
+				      scalar_int_mode);
+extern rtx default_gen_memset_value (rtx, void *, scalar_int_mode);
+
 #endif /* GCC_TARGHOOKS_H */