diff mbox series

[v2] x86: Properly find the maximum stack slot alignment

Message ID 20230707151336.691534-1-hjl.tools@gmail.com
State New
Headers show
Series [v2] x86: Properly find the maximum stack slot alignment | expand

Commit Message

H.J. Lu July 7, 2023, 3:13 p.m. UTC
Don't assume that stack slots can only be accessed by stack or frame
registers.  We first find all registers defined by stack or frame
registers.  Then check memory accesses by such registers, including
stack and frame registers.

gcc/

	PR target/109780
	* config/i386/i386.cc (ix86_update_stack_alignment): New.
	(ix86_find_all_reg_use): Likewise.
	(ix86_find_max_used_stack_alignment): Also check memory accesses
	from registers defined by stack or frame registers.

gcc/testsuite/

	PR target/109780
	* g++.target/i386/pr109780-1.C: New test.
	* gcc.target/i386/pr109780-1.c: Likewise.
	* gcc.target/i386/pr109780-2.c: Likewise.
---
 gcc/config/i386/i386.cc                    | 120 +++++++++++++++++----
 gcc/testsuite/g++.target/i386/pr109780-1.C |  72 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr109780-1.c |  14 +++
 gcc/testsuite/gcc.target/i386/pr109780-2.c |  21 ++++
 4 files changed, 206 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c

Comments

Richard Biener July 10, 2023, 10:32 a.m. UTC | #1
On Fri, Jul 7, 2023 at 5:14 PM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Don't assume that stack slots can only be accessed by stack or frame
> registers.  We first find all registers defined by stack or frame
> registers.  Then check memory accesses by such registers, including
> stack and frame registers.
>
> gcc/
>
>         PR target/109780
>         * config/i386/i386.cc (ix86_update_stack_alignment): New.
>         (ix86_find_all_reg_use): Likewise.
>         (ix86_find_max_used_stack_alignment): Also check memory accesses
>         from registers defined by stack or frame registers.
>
> gcc/testsuite/
>
>         PR target/109780
>         * g++.target/i386/pr109780-1.C: New test.
>         * gcc.target/i386/pr109780-1.c: Likewise.
>         * gcc.target/i386/pr109780-2.c: Likewise.
> ---
>  gcc/config/i386/i386.cc                    | 120 +++++++++++++++++----
>  gcc/testsuite/g++.target/i386/pr109780-1.C |  72 +++++++++++++
>  gcc/testsuite/gcc.target/i386/pr109780-1.c |  14 +++
>  gcc/testsuite/gcc.target/i386/pr109780-2.c |  21 ++++
>  4 files changed, 206 insertions(+), 21 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index caca74d6dec..27f349b0ccb 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> +/* Update the maximum stack slot alignment from memory alignment in
> +   PAT.  */
> +
> +static void
> +ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> +{
> +  /* This insn may reference stack slot.  Update the maximum stack slot
> +     alignment.  */
> +  subrtx_iterator::array_type array;
> +  FOR_EACH_SUBRTX (iter, array, pat, ALL)
> +    if (MEM_P (*iter))
> +      {
> +       unsigned int alignment = MEM_ALIGN (*iter);
> +       unsigned int *stack_alignment
> +         = (unsigned int *) data;
> +       if (alignment > *stack_alignment)
> +         *stack_alignment = alignment;
> +       break;
> +      }
> +}
> +
> +/* Find all registers defined with REG.  */
> +
> +static void
> +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg)
> +{
> +  for (df_ref ref = DF_REG_USE_CHAIN (reg);
> +       ref != NULL;
> +       ref = DF_REF_NEXT_REG (ref))
> +    {
> +      if (DF_REF_IS_ARTIFICIAL (ref))
> +       continue;
> +
> +      rtx_insn *insn = DF_REF_INSN (ref);
> +      if (!NONDEBUG_INSN_P (insn))
> +       continue;
> +
> +      rtx set = single_set (insn);
> +      if (!set)
> +       continue;
> +
> +      rtx src = SET_SRC (set);
> +      if (MEM_P (src))
> +       continue;
> +
> +      rtx dest = SET_DEST (set);
> +      if (!REG_P (dest))
> +       continue;
> +
> +      if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
> +       continue;
> +
> +      /* Add this register to stack_slot_access.  */
> +      add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
> +    }
> +}
> +
>  /* Set stack_frame_required to false if stack frame isn't required.
>     Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
>     slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
> @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
>    add_to_hard_reg_set (&set_up_by_prologue, Pmode,
>                        HARD_FRAME_POINTER_REGNUM);
>
> -  /* The preferred stack alignment is the minimum stack alignment.  */
> -  if (stack_alignment > crtl->preferred_stack_boundary)
> -    stack_alignment = crtl->preferred_stack_boundary;
> -
>    bool require_stack_frame = false;
>
>    FOR_EACH_BB_FN (bb, cfun)
> @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
>                                        set_up_by_prologue))
>           {
>             require_stack_frame = true;
> -
> -           if (check_stack_slot)
> -             {
> -               /* Find the maximum stack alignment.  */
> -               subrtx_iterator::array_type array;
> -               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> -                 if (MEM_P (*iter)
> -                     && (reg_mentioned_p (stack_pointer_rtx,
> -                                          *iter)
> -                         || reg_mentioned_p (frame_pointer_rtx,
> -                                             *iter)))
> -                   {
> -                     unsigned int alignment = MEM_ALIGN (*iter);
> -                     if (alignment > stack_alignment)
> -                       stack_alignment = alignment;
> -                   }
> -             }
> +           break;
>           }
>      }
>
>    cfun->machine->stack_frame_required = require_stack_frame;
> +
> +  /* Stop if we don't need to check stack slot.  */
> +  if (!check_stack_slot)
> +    return;
> +
> +  /* The preferred stack alignment is the minimum stack alignment.  */
> +  if (stack_alignment > crtl->preferred_stack_boundary)
> +    stack_alignment = crtl->preferred_stack_boundary;
> +
> +  HARD_REG_SET stack_slot_access;
> +  CLEAR_HARD_REG_SET (stack_slot_access);
> +
> +  /* Stack slot can be accessed by stack pointer, frame pointer or
> +     registers defined by stack pointer or frame pointer.  */
> +  add_to_hard_reg_set (&stack_slot_access, Pmode,
> +                      STACK_POINTER_REGNUM);

I'm possibly missing the point, but wasn't the purpose of the change to detect
"indirect" uses of the stack pointer?  This now handles two levels, thus
*sp and tem = sp + 4; *tem, but it doesn't seem to handle tem2 = tem + 4;
*tem2?  The previous patch did I think, but its compile-time complexity wasn't
sound.

I think the ix86_find_all_reg_use wants to wrap itself inside sth like

  auto_bitmap worklist;

  bitmap_set_bit (worklist, reg);
  do
     {
        int reg = bitmap_clear_first_set_bit (worklist);
        /* it's current processing of reg */
        when it adds to stack_slot_access also add to worklist
     }
  while (!bitmap_empty_p (worklist));

> +  ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM);
> +  if (frame_pointer_needed)
> +    {
> +      add_to_hard_reg_set (&stack_slot_access, Pmode,
> +                          HARD_FRAME_POINTER_REGNUM);
> +      ix86_find_all_reg_use (stack_slot_access,
> +                            HARD_FRAME_POINTER_REGNUM);
> +    }
> +
> +  for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> +    if (GENERAL_REGNO_P (i)
> +       && TEST_HARD_REG_BIT (stack_slot_access, i))

You can as well iterate over the stack_slot_access regset here?

> +      for (df_ref ref = DF_REG_USE_CHAIN (i);
> +          ref != NULL;
> +          ref = DF_REF_NEXT_REG (ref))
> +       {
> +         if (DF_REF_IS_ARTIFICIAL (ref))
> +           continue;
> +
> +         rtx_insn *insn = DF_REF_INSN (ref);
> +         if (!NONDEBUG_INSN_P (insn))
> +           continue;
> +         note_stores (insn, ix86_update_stack_alignment,
> +                      &stack_alignment);
> +       }
>  }
>
>  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
> new file mode 100644
> index 00000000000..7e3eabdec94
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
> @@ -0,0 +1,72 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target c++17 } */
> +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
> +
> +template <typename _Tp> struct remove_reference {
> +  using type = __remove_reference(_Tp);
> +};
> +template <typename T> struct MaybeStorageBase {
> +  T val;
> +  struct Union {
> +    ~Union();
> +  } mStorage;
> +};
> +template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
> +  char mIsSome;
> +};
> +template <typename T, typename U = typename remove_reference<T>::type>
> +constexpr MaybeStorage<U> Some(T &&);
> +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
> +  return {aValue};
> +}
> +template <class> struct Span {
> +  int operator[](long idx) {
> +    int *__trans_tmp_4;
> +    if (__builtin_expect(idx, 0))
> +      *(int *)__null = false;
> +    __trans_tmp_4 = storage_.data();
> +    return __trans_tmp_4[idx];
> +  }
> +  struct {
> +    int *data() { return data_; }
> +    int *data_;
> +  } storage_;
> +};
> +struct Variant {
> +  template <typename RefT> Variant(RefT) {}
> +};
> +long from_i, from___trans_tmp_9;
> +namespace js::intl {
> +struct DecimalNumber {
> +  Variant string_;
> +  unsigned long significandStart_;
> +  unsigned long significandEnd_;
> +  bool zero_ = false;
> +  bool negative_;
> +  template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
> +  template <typename CharT>
> +  static MaybeStorage<DecimalNumber> from(Span<const CharT>);
> +  void from();
> +};
> +} // namespace js::intl
> +void js::intl::DecimalNumber::from() {
> +  Span<const char16_t> __trans_tmp_3;
> +  from(__trans_tmp_3);
> +}
> +template <typename CharT>
> +MaybeStorage<js::intl::DecimalNumber>
> +js::intl::DecimalNumber::from(Span<const CharT> chars) {
> +  DecimalNumber number(chars);
> +  if (auto ch = chars[from_i]) {
> +    from_i++;
> +    number.negative_ = ch == '-';
> +  }
> +  while (from___trans_tmp_9 && chars[from_i])
> +    ;
> +  if (chars[from_i])
> +    while (chars[from_i - 1])
> +      number.zero_ = true;
> +  return Some(number);
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> new file mode 100644
> index 00000000000..6b06947f2a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=skylake" } */
> +
> +char perm[64];
> +
> +void
> +__attribute__((noipa))
> +foo (int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    perm[i] = i;
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> new file mode 100644
> index 00000000000..152da06c6ad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=skylake" } */
> +
> +#define N 9
> +
> +void
> +f (double x, double y, double *res)
> +{
> +  y = -y;
> +  for (int i = 0; i < N; ++i)
> +    {
> +      double tmp = y;
> +      y = x;
> +      x = tmp;
> +      res[i] = i;
> +    }
> +  res[N] = y * y;
> +  res[N + 1] = x;
> +}
> +
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> --
> 2.41.0
>
H.J. Lu July 24, 2023, 6:12 p.m. UTC | #2
On Mon, Jul 10, 2023 at 3:32 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Fri, Jul 7, 2023 at 5:14 PM H.J. Lu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Don't assume that stack slots can only be accessed by stack or frame
> > registers.  We first find all registers defined by stack or frame
> > registers.  Then check memory accesses by such registers, including
> > stack and frame registers.
> >
> > gcc/
> >
> >         PR target/109780
> >         * config/i386/i386.cc (ix86_update_stack_alignment): New.
> >         (ix86_find_all_reg_use): Likewise.
> >         (ix86_find_max_used_stack_alignment): Also check memory accesses
> >         from registers defined by stack or frame registers.
> >
> > gcc/testsuite/
> >
> >         PR target/109780
> >         * g++.target/i386/pr109780-1.C: New test.
> >         * gcc.target/i386/pr109780-1.c: Likewise.
> >         * gcc.target/i386/pr109780-2.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc                    | 120 +++++++++++++++++----
> >  gcc/testsuite/g++.target/i386/pr109780-1.C |  72 +++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr109780-1.c |  14 +++
> >  gcc/testsuite/gcc.target/i386/pr109780-2.c |  21 ++++
> >  4 files changed, 206 insertions(+), 21 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index caca74d6dec..27f349b0ccb 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end)
> >    return "";
> >  }
> >
> > +/* Update the maximum stack slot alignment from memory alignment in
> > +   PAT.  */
> > +
> > +static void
> > +ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
> > +{
> > +  /* This insn may reference stack slot.  Update the maximum stack slot
> > +     alignment.  */
> > +  subrtx_iterator::array_type array;
> > +  FOR_EACH_SUBRTX (iter, array, pat, ALL)
> > +    if (MEM_P (*iter))
> > +      {
> > +       unsigned int alignment = MEM_ALIGN (*iter);
> > +       unsigned int *stack_alignment
> > +         = (unsigned int *) data;
> > +       if (alignment > *stack_alignment)
> > +         *stack_alignment = alignment;
> > +       break;
> > +      }
> > +}
> > +
> > +/* Find all registers defined with REG.  */
> > +
> > +static void
> > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg)
> > +{
> > +  for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > +       ref != NULL;
> > +       ref = DF_REF_NEXT_REG (ref))
> > +    {
> > +      if (DF_REF_IS_ARTIFICIAL (ref))
> > +       continue;
> > +
> > +      rtx_insn *insn = DF_REF_INSN (ref);
> > +      if (!NONDEBUG_INSN_P (insn))
> > +       continue;
> > +
> > +      rtx set = single_set (insn);
> > +      if (!set)
> > +       continue;
> > +
> > +      rtx src = SET_SRC (set);
> > +      if (MEM_P (src))
> > +       continue;
> > +
> > +      rtx dest = SET_DEST (set);
> > +      if (!REG_P (dest))
> > +       continue;
> > +
> > +      if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
> > +       continue;
> > +
> > +      /* Add this register to stack_slot_access.  */
> > +      add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
> > +    }
> > +}
> > +
> >  /* Set stack_frame_required to false if stack frame isn't required.
> >     Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
> >     slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
> > @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >    add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> >                        HARD_FRAME_POINTER_REGNUM);
> >
> > -  /* The preferred stack alignment is the minimum stack alignment.  */
> > -  if (stack_alignment > crtl->preferred_stack_boundary)
> > -    stack_alignment = crtl->preferred_stack_boundary;
> > -
> >    bool require_stack_frame = false;
> >
> >    FOR_EACH_BB_FN (bb, cfun)
> > @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> >                                        set_up_by_prologue))
> >           {
> >             require_stack_frame = true;
> > -
> > -           if (check_stack_slot)
> > -             {
> > -               /* Find the maximum stack alignment.  */
> > -               subrtx_iterator::array_type array;
> > -               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> > -                 if (MEM_P (*iter)
> > -                     && (reg_mentioned_p (stack_pointer_rtx,
> > -                                          *iter)
> > -                         || reg_mentioned_p (frame_pointer_rtx,
> > -                                             *iter)))
> > -                   {
> > -                     unsigned int alignment = MEM_ALIGN (*iter);
> > -                     if (alignment > stack_alignment)
> > -                       stack_alignment = alignment;
> > -                   }
> > -             }
> > +           break;
> >           }
> >      }
> >
> >    cfun->machine->stack_frame_required = require_stack_frame;
> > +
> > +  /* Stop if we don't need to check stack slot.  */
> > +  if (!check_stack_slot)
> > +    return;
> > +
> > +  /* The preferred stack alignment is the minimum stack alignment.  */
> > +  if (stack_alignment > crtl->preferred_stack_boundary)
> > +    stack_alignment = crtl->preferred_stack_boundary;
> > +
> > +  HARD_REG_SET stack_slot_access;
> > +  CLEAR_HARD_REG_SET (stack_slot_access);
> > +
> > +  /* Stack slot can be accessed by stack pointer, frame pointer or
> > +     registers defined by stack pointer or frame pointer.  */
> > +  add_to_hard_reg_set (&stack_slot_access, Pmode,
> > +                      STACK_POINTER_REGNUM);
>
> I'm possibly missing the point, but wasn't the purpose of the change to detect
> "indirect" uses of the stack pointer?  This now handles two levels, thus
> *sp and tem = sp + 4; *tem, but it doesn't seem to handle tem2 = tem + 4;
> *tem2?  The previous patch did I think, but its compile-time complexity wasn't
> sound.
>
> I think the ix86_find_all_reg_use wants to wrap itself inside sth like
>
>   auto_bitmap worklist;
>
>   bitmap_set_bit (worklist, reg);
>   do
>      {
>         int reg = bitmap_clear_first_set_bit (worklist);
>         /* it's current processing of reg */
>         when it adds to stack_slot_access also add to worklist
>      }
>   while (!bitmap_empty_p (worklist));

Will fix.

> > +  ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM);
> > +  if (frame_pointer_needed)
> > +    {
> > +      add_to_hard_reg_set (&stack_slot_access, Pmode,
> > +                          HARD_FRAME_POINTER_REGNUM);
> > +      ix86_find_all_reg_use (stack_slot_access,
> > +                            HARD_FRAME_POINTER_REGNUM);
> > +    }
> > +
> > +  for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +    if (GENERAL_REGNO_P (i)
> > +       && TEST_HARD_REG_BIT (stack_slot_access, i))
>
> You can as well iterate over the stack_slot_access regset here?

Will fix.

Thanks.

> > +      for (df_ref ref = DF_REG_USE_CHAIN (i);
> > +          ref != NULL;
> > +          ref = DF_REF_NEXT_REG (ref))
> > +       {
> > +         if (DF_REF_IS_ARTIFICIAL (ref))
> > +           continue;
> > +
> > +         rtx_insn *insn = DF_REF_INSN (ref);
> > +         if (!NONDEBUG_INSN_P (insn))
> > +           continue;
> > +         note_stores (insn, ix86_update_stack_alignment,
> > +                      &stack_alignment);
> > +       }
> >  }
> >
> >  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> > diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
> > new file mode 100644
> > index 00000000000..7e3eabdec94
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
> > @@ -0,0 +1,72 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target c++17 } */
> > +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
> > +
> > +template <typename _Tp> struct remove_reference {
> > +  using type = __remove_reference(_Tp);
> > +};
> > +template <typename T> struct MaybeStorageBase {
> > +  T val;
> > +  struct Union {
> > +    ~Union();
> > +  } mStorage;
> > +};
> > +template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
> > +  char mIsSome;
> > +};
> > +template <typename T, typename U = typename remove_reference<T>::type>
> > +constexpr MaybeStorage<U> Some(T &&);
> > +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
> > +  return {aValue};
> > +}
> > +template <class> struct Span {
> > +  int operator[](long idx) {
> > +    int *__trans_tmp_4;
> > +    if (__builtin_expect(idx, 0))
> > +      *(int *)__null = false;
> > +    __trans_tmp_4 = storage_.data();
> > +    return __trans_tmp_4[idx];
> > +  }
> > +  struct {
> > +    int *data() { return data_; }
> > +    int *data_;
> > +  } storage_;
> > +};
> > +struct Variant {
> > +  template <typename RefT> Variant(RefT) {}
> > +};
> > +long from_i, from___trans_tmp_9;
> > +namespace js::intl {
> > +struct DecimalNumber {
> > +  Variant string_;
> > +  unsigned long significandStart_;
> > +  unsigned long significandEnd_;
> > +  bool zero_ = false;
> > +  bool negative_;
> > +  template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
> > +  template <typename CharT>
> > +  static MaybeStorage<DecimalNumber> from(Span<const CharT>);
> > +  void from();
> > +};
> > +} // namespace js::intl
> > +void js::intl::DecimalNumber::from() {
> > +  Span<const char16_t> __trans_tmp_3;
> > +  from(__trans_tmp_3);
> > +}
> > +template <typename CharT>
> > +MaybeStorage<js::intl::DecimalNumber>
> > +js::intl::DecimalNumber::from(Span<const CharT> chars) {
> > +  DecimalNumber number(chars);
> > +  if (auto ch = chars[from_i]) {
> > +    from_i++;
> > +    number.negative_ = ch == '-';
> > +  }
> > +  while (from___trans_tmp_9 && chars[from_i])
> > +    ;
> > +  if (chars[from_i])
> > +    while (chars[from_i - 1])
> > +      number.zero_ = true;
> > +  return Some(number);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> > new file mode 100644
> > index 00000000000..6b06947f2a5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=skylake" } */
> > +
> > +char perm[64];
> > +
> > +void
> > +__attribute__((noipa))
> > +foo (int n)
> > +{
> > +  for (int i = 0; i < n; ++i)
> > +    perm[i] = i;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> > new file mode 100644
> > index 00000000000..152da06c6ad
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=skylake" } */
> > +
> > +#define N 9
> > +
> > +void
> > +f (double x, double y, double *res)
> > +{
> > +  y = -y;
> > +  for (int i = 0; i < N; ++i)
> > +    {
> > +      double tmp = y;
> > +      y = x;
> > +      x = tmp;
> > +      res[i] = i;
> > +    }
> > +  res[N] = y * y;
> > +  res[N + 1] = x;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
> > --
> > 2.41.0
> >
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index caca74d6dec..27f349b0ccb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -8084,6 +8084,63 @@  output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Update the maximum stack slot alignment from memory alignment in
+   PAT.  */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+  /* This insn may reference stack slot.  Update the maximum stack slot
+     alignment.  */
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, pat, ALL)
+    if (MEM_P (*iter))
+      {
+	unsigned int alignment = MEM_ALIGN (*iter);
+	unsigned int *stack_alignment
+	  = (unsigned int *) data;
+	if (alignment > *stack_alignment)
+	  *stack_alignment = alignment;
+	break;
+      }
+}
+
+/* Find all registers defined with REG.  */
+
+static void
+ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg)
+{
+  for (df_ref ref = DF_REG_USE_CHAIN (reg);
+       ref != NULL;
+       ref = DF_REF_NEXT_REG (ref))
+    {
+      if (DF_REF_IS_ARTIFICIAL (ref))
+	continue;
+
+      rtx_insn *insn = DF_REF_INSN (ref);
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+      rtx set = single_set (insn);
+      if (!set)
+	continue;
+
+      rtx src = SET_SRC (set);
+      if (MEM_P (src))
+	continue;
+
+      rtx dest = SET_DEST (set);
+      if (!REG_P (dest))
+	continue;
+
+      if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
+	continue;
+
+      /* Add this register to stack_slot_access.  */
+      add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
+    }
+}
+
 /* Set stack_frame_required to false if stack frame isn't required.
    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
@@ -8102,10 +8159,6 @@  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
 		       HARD_FRAME_POINTER_REGNUM);
 
-  /* The preferred stack alignment is the minimum stack alignment.  */
-  if (stack_alignment > crtl->preferred_stack_boundary)
-    stack_alignment = crtl->preferred_stack_boundary;
-
   bool require_stack_frame = false;
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -8117,27 +8170,52 @@  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 				       set_up_by_prologue))
 	  {
 	    require_stack_frame = true;
-
-	    if (check_stack_slot)
-	      {
-		/* Find the maximum stack alignment.  */
-		subrtx_iterator::array_type array;
-		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
-		  if (MEM_P (*iter)
-		      && (reg_mentioned_p (stack_pointer_rtx,
-					   *iter)
-			  || reg_mentioned_p (frame_pointer_rtx,
-					      *iter)))
-		    {
-		      unsigned int alignment = MEM_ALIGN (*iter);
-		      if (alignment > stack_alignment)
-			stack_alignment = alignment;
-		    }
-	      }
+	    break;
 	  }
     }
 
   cfun->machine->stack_frame_required = require_stack_frame;
+
+  /* Stop if we don't need to check stack slot.  */
+  if (!check_stack_slot)
+    return;
+
+  /* The preferred stack alignment is the minimum stack alignment.  */
+  if (stack_alignment > crtl->preferred_stack_boundary)
+    stack_alignment = crtl->preferred_stack_boundary;
+
+  HARD_REG_SET stack_slot_access;
+  CLEAR_HARD_REG_SET (stack_slot_access);
+
+  /* Stack slot can be accessed by stack pointer, frame pointer or
+     registers defined by stack pointer or frame pointer.  */
+  add_to_hard_reg_set (&stack_slot_access, Pmode,
+		       STACK_POINTER_REGNUM);
+  ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM);
+  if (frame_pointer_needed)
+    {
+      add_to_hard_reg_set (&stack_slot_access, Pmode,
+			   HARD_FRAME_POINTER_REGNUM);
+      ix86_find_all_reg_use (stack_slot_access,
+			     HARD_FRAME_POINTER_REGNUM);
+    }
+
+  for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (GENERAL_REGNO_P (i)
+	&& TEST_HARD_REG_BIT (stack_slot_access, i))
+      for (df_ref ref = DF_REG_USE_CHAIN (i);
+	   ref != NULL;
+	   ref = DF_REF_NEXT_REG (ref))
+	{
+	  if (DF_REF_IS_ARTIFICIAL (ref))
+	    continue;
+
+	  rtx_insn *insn = DF_REF_INSN (ref);
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+	  note_stores (insn, ix86_update_stack_alignment,
+		       &stack_alignment);
+	}
 }
 
 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C
new file mode 100644
index 00000000000..7e3eabdec94
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr109780-1.C
@@ -0,0 +1,72 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target c++17 } */
+/* { dg-options "-O2 -mavx2 -mtune=haswell" } */
+
+template <typename _Tp> struct remove_reference {
+  using type = __remove_reference(_Tp);
+};
+template <typename T> struct MaybeStorageBase {
+  T val;
+  struct Union {
+    ~Union();
+  } mStorage;
+};
+template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
+  char mIsSome;
+};
+template <typename T, typename U = typename remove_reference<T>::type>
+constexpr MaybeStorage<U> Some(T &&);
+template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
+  return {aValue};
+}
+template <class> struct Span {
+  int operator[](long idx) {
+    int *__trans_tmp_4;
+    if (__builtin_expect(idx, 0))
+      *(int *)__null = false;
+    __trans_tmp_4 = storage_.data();
+    return __trans_tmp_4[idx];
+  }
+  struct {
+    int *data() { return data_; }
+    int *data_;
+  } storage_;
+};
+struct Variant {
+  template <typename RefT> Variant(RefT) {}
+};
+long from_i, from___trans_tmp_9;
+namespace js::intl {
+struct DecimalNumber {
+  Variant string_;
+  unsigned long significandStart_;
+  unsigned long significandEnd_;
+  bool zero_ = false;
+  bool negative_;
+  template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
+  template <typename CharT>
+  static MaybeStorage<DecimalNumber> from(Span<const CharT>);
+  void from();
+};
+} // namespace js::intl
+void js::intl::DecimalNumber::from() {
+  Span<const char16_t> __trans_tmp_3;
+  from(__trans_tmp_3);
+}
+template <typename CharT>
+MaybeStorage<js::intl::DecimalNumber>
+js::intl::DecimalNumber::from(Span<const CharT> chars) {
+  DecimalNumber number(chars);
+  if (auto ch = chars[from_i]) {
+    from_i++;
+    number.negative_ = ch == '-';
+  }
+  while (from___trans_tmp_9 && chars[from_i])
+    ;
+  if (chars[from_i])
+    while (chars[from_i - 1])
+      number.zero_ = true;
+  return Some(number);
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c
new file mode 100644
index 00000000000..6b06947f2a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+char perm[64];
+
+void
+__attribute__((noipa))
+foo (int n)
+{
+  for (int i = 0; i < n; ++i)
+    perm[i] = i;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c
new file mode 100644
index 00000000000..152da06c6ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake" } */
+
+#define N 9
+
+void
+f (double x, double y, double *res)
+{
+  y = -y;
+  for (int i = 0; i < N; ++i)
+    {
+      double tmp = y;
+      y = x;
+      x = tmp;
+      res[i] = i;
+    }
+  res[N] = y * y;
+  res[N + 1] = x;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */