Message ID | 20230707151336.691534-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2] x86: Properly find the maximum stack slot alignment | expand |
On Fri, Jul 7, 2023 at 5:14 PM H.J. Lu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Don't assume that stack slots can only be accessed by stack or frame > registers. We first find all registers defined by stack or frame > registers. Then check memory accesses by such registers, including > stack and frame registers. > > gcc/ > > PR target/109780 > * config/i386/i386.cc (ix86_update_stack_alignment): New. > (ix86_find_all_reg_use): Likewise. > (ix86_find_max_used_stack_alignment): Also check memory accesses > from registers defined by stack or frame registers. > > gcc/testsuite/ > > PR target/109780 > * g++.target/i386/pr109780-1.C: New test. > * gcc.target/i386/pr109780-1.c: Likewise. > * gcc.target/i386/pr109780-2.c: Likewise. > --- > gcc/config/i386/i386.cc | 120 +++++++++++++++++---- > gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++++++ > gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++ > gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++ > 4 files changed, 206 insertions(+), 21 deletions(-) > create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index caca74d6dec..27f349b0ccb 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end) > return ""; > } > > +/* Update the maximum stack slot alignment from memory alignment in > + PAT. */ > + > +static void > +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) > +{ > + /* This insn may reference stack slot. Update the maximum stack slot > + alignment. */ > + subrtx_iterator::array_type array; > + FOR_EACH_SUBRTX (iter, array, pat, ALL) > + if (MEM_P (*iter)) > + { > + unsigned int alignment = MEM_ALIGN (*iter); > + unsigned int *stack_alignment > + = (unsigned int *) data; > + if (alignment > *stack_alignment) > + *stack_alignment = alignment; > + break; > + } > +} > + > +/* Find all registers defined with REG. */ > + > +static void > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg) > +{ > + for (df_ref ref = DF_REG_USE_CHAIN (reg); > + ref != NULL; > + ref = DF_REF_NEXT_REG (ref)) > + { > + if (DF_REF_IS_ARTIFICIAL (ref)) > + continue; > + > + rtx_insn *insn = DF_REF_INSN (ref); > + if (!NONDEBUG_INSN_P (insn)) > + continue; > + > + rtx set = single_set (insn); > + if (!set) > + continue; > + > + rtx src = SET_SRC (set); > + if (MEM_P (src)) > + continue; > + > + rtx dest = SET_DEST (set); > + if (!REG_P (dest)) > + continue; > + > + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) > + continue; > + > + /* Add this register to stack_slot_access. */ > + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); > + } > +} > + > /* Set stack_frame_required to false if stack frame isn't required. > Update STACK_ALIGNMENT to the largest alignment, in bits, of stack > slot used if stack frame is required and CHECK_STACK_SLOT is true. */ > @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, > add_to_hard_reg_set (&set_up_by_prologue, Pmode, > HARD_FRAME_POINTER_REGNUM); > > - /* The preferred stack alignment is the minimum stack alignment. */ > - if (stack_alignment > crtl->preferred_stack_boundary) > - stack_alignment = crtl->preferred_stack_boundary; > - > bool require_stack_frame = false; > > FOR_EACH_BB_FN (bb, cfun) > @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, > set_up_by_prologue)) > { > require_stack_frame = true; > - > - if (check_stack_slot) > - { > - /* Find the maximum stack alignment. */ > - subrtx_iterator::array_type array; > - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) > - if (MEM_P (*iter) > - && (reg_mentioned_p (stack_pointer_rtx, > - *iter) > - || reg_mentioned_p (frame_pointer_rtx, > - *iter))) > - { > - unsigned int alignment = MEM_ALIGN (*iter); > - if (alignment > stack_alignment) > - stack_alignment = alignment; > - } > - } > + break; > } > } > > cfun->machine->stack_frame_required = require_stack_frame; > + > + /* Stop if we don't need to check stack slot. */ > + if (!check_stack_slot) > + return; > + > + /* The preferred stack alignment is the minimum stack alignment. */ > + if (stack_alignment > crtl->preferred_stack_boundary) > + stack_alignment = crtl->preferred_stack_boundary; > + > + HARD_REG_SET stack_slot_access; > + CLEAR_HARD_REG_SET (stack_slot_access); > + > + /* Stack slot can be accessed by stack pointer, frame pointer or > + registers defined by stack pointer or frame pointer. */ > + add_to_hard_reg_set (&stack_slot_access, Pmode, > + STACK_POINTER_REGNUM); I'm possibly missing the point, but wasn't the purpose of the change to detect "indirect" uses of the stack pointer? This now handles two levels, thus *sp and tem = sp + 4; *tem, but it doesn't seem to handle tem2 = tem + 4; *tem2? The previous patch did I think, but its compile-time complexity wasn't sound. I think the ix86_find_all_reg_use wants to wrap itself inside sth like auto_bitmap worklist; bitmap_set_bit (worklist, reg); do { int reg = bitmap_clear_first_set_bit (worklist); /* it's current processing of reg */ when it adds to stack_slot_access also add to worklist } while (!bitmap_empty_p (worklist)); > + ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM); > + if (frame_pointer_needed) > + { > + add_to_hard_reg_set (&stack_slot_access, Pmode, > + HARD_FRAME_POINTER_REGNUM); > + ix86_find_all_reg_use (stack_slot_access, > + HARD_FRAME_POINTER_REGNUM); > + } > + > + for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) > + if (GENERAL_REGNO_P (i) > + && TEST_HARD_REG_BIT (stack_slot_access, i)) You can as well iterate over the stack_slot_access regset here? > + for (df_ref ref = DF_REG_USE_CHAIN (i); > + ref != NULL; > + ref = DF_REF_NEXT_REG (ref)) > + { > + if (DF_REF_IS_ARTIFICIAL (ref)) > + continue; > + > + rtx_insn *insn = DF_REF_INSN (ref); > + if (!NONDEBUG_INSN_P (insn)) > + continue; > + note_stores (insn, ix86_update_stack_alignment, > + &stack_alignment); > + } > } > > /* Finalize stack_realign_needed and frame_pointer_needed flags, which > diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C > new file mode 100644 > index 00000000000..7e3eabdec94 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C > @@ -0,0 +1,72 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target c++17 } */ > +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */ > + > +template <typename _Tp> struct remove_reference { > + using type = __remove_reference(_Tp); > +}; > +template <typename T> struct MaybeStorageBase { > + T val; > + struct Union { > + ~Union(); > + } mStorage; > +}; > +template <typename T> struct MaybeStorage : MaybeStorageBase<T> { > + char mIsSome; > +}; > +template <typename T, typename U = typename remove_reference<T>::type> > +constexpr MaybeStorage<U> Some(T &&); > +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) { > + return {aValue}; > +} > +template <class> struct Span { > + int operator[](long idx) { > + int *__trans_tmp_4; > + if (__builtin_expect(idx, 0)) > + *(int *)__null = false; > + __trans_tmp_4 = storage_.data(); > + return __trans_tmp_4[idx]; > + } > + struct { > + int *data() { return data_; } > + int *data_; > + } storage_; > +}; > +struct Variant { > + template <typename RefT> Variant(RefT) {} > +}; > +long from_i, from___trans_tmp_9; > +namespace js::intl { > +struct DecimalNumber { > + Variant string_; > + unsigned long significandStart_; > + unsigned long significandEnd_; > + bool zero_ = false; > + bool negative_; > + template <typename CharT> DecimalNumber(CharT string) : string_(string) {} > + template <typename CharT> > + static MaybeStorage<DecimalNumber> from(Span<const CharT>); > + void from(); > +}; > +} // namespace js::intl > +void js::intl::DecimalNumber::from() { > + Span<const char16_t> __trans_tmp_3; > + from(__trans_tmp_3); > +} > +template <typename CharT> > +MaybeStorage<js::intl::DecimalNumber> > +js::intl::DecimalNumber::from(Span<const CharT> chars) { > + DecimalNumber number(chars); > + if (auto ch = chars[from_i]) { > + from_i++; > + number.negative_ = ch == '-'; > + } > + while (from___trans_tmp_9 && chars[from_i]) > + ; > + if (chars[from_i]) > + while (chars[from_i - 1]) > + number.zero_ = true; > + return Some(number); > +} > + > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c > new file mode 100644 > index 00000000000..6b06947f2a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=skylake" } */ > + > +char perm[64]; > + > +void > +__attribute__((noipa)) > +foo (int n) > +{ > + for (int i = 0; i < n; ++i) > + perm[i] = i; > +} > + > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c > new file mode 100644 > index 00000000000..152da06c6ad > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=skylake" } */ > + > +#define N 9 > + > +void > +f (double x, double y, double *res) > +{ > + y = -y; > + for (int i = 0; i < N; ++i) > + { > + double tmp = y; > + y = x; > + x = tmp; > + res[i] = i; > + } > + res[N] = y * y; > + res[N + 1] = x; > +} > + > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ > -- > 2.41.0 >
On Mon, Jul 10, 2023 at 3:32 AM Richard Biener <richard.guenther@gmail.com> wrote: > > On Fri, Jul 7, 2023 at 5:14 PM H.J. Lu via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Don't assume that stack slots can only be accessed by stack or frame > > registers. We first find all registers defined by stack or frame > > registers. Then check memory accesses by such registers, including > > stack and frame registers. > > > > gcc/ > > > > PR target/109780 > > * config/i386/i386.cc (ix86_update_stack_alignment): New. > > (ix86_find_all_reg_use): Likewise. > > (ix86_find_max_used_stack_alignment): Also check memory accesses > > from registers defined by stack or frame registers. > > > > gcc/testsuite/ > > > > PR target/109780 > > * g++.target/i386/pr109780-1.C: New test. > > * gcc.target/i386/pr109780-1.c: Likewise. > > * gcc.target/i386/pr109780-2.c: Likewise. > > --- > > gcc/config/i386/i386.cc | 120 +++++++++++++++++---- > > gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++++++ > > gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 +++ > > gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 ++++ > > 4 files changed, 206 insertions(+), 21 deletions(-) > > create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C > > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c > > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > index caca74d6dec..27f349b0ccb 100644 > > --- a/gcc/config/i386/i386.cc > > +++ b/gcc/config/i386/i386.cc > > @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end) > > return ""; > > } > > > > +/* Update the maximum stack slot alignment from memory alignment in > > + PAT. */ > > + > > +static void > > +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) > > +{ > > + /* This insn may reference stack slot. Update the maximum stack slot > > + alignment. */ > > + subrtx_iterator::array_type array; > > + FOR_EACH_SUBRTX (iter, array, pat, ALL) > > + if (MEM_P (*iter)) > > + { > > + unsigned int alignment = MEM_ALIGN (*iter); > > + unsigned int *stack_alignment > > + = (unsigned int *) data; > > + if (alignment > *stack_alignment) > > + *stack_alignment = alignment; > > + break; > > + } > > +} > > + > > +/* Find all registers defined with REG. */ > > + > > +static void > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg) > > +{ > > + for (df_ref ref = DF_REG_USE_CHAIN (reg); > > + ref != NULL; > > + ref = DF_REF_NEXT_REG (ref)) > > + { > > + if (DF_REF_IS_ARTIFICIAL (ref)) > > + continue; > > + > > + rtx_insn *insn = DF_REF_INSN (ref); > > + if (!NONDEBUG_INSN_P (insn)) > > + continue; > > + > > + rtx set = single_set (insn); > > + if (!set) > > + continue; > > + > > + rtx src = SET_SRC (set); > > + if (MEM_P (src)) > > + continue; > > + > > + rtx dest = SET_DEST (set); > > + if (!REG_P (dest)) > > + continue; > > + > > + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) > > + continue; > > + > > + /* Add this register to stack_slot_access. */ > > + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); > > + } > > +} > > + > > /* Set stack_frame_required to false if stack frame isn't required. > > Update STACK_ALIGNMENT to the largest alignment, in bits, of stack > > slot used if stack frame is required and CHECK_STACK_SLOT is true. */ > > @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, > > add_to_hard_reg_set (&set_up_by_prologue, Pmode, > > HARD_FRAME_POINTER_REGNUM); > > > > - /* The preferred stack alignment is the minimum stack alignment. */ > > - if (stack_alignment > crtl->preferred_stack_boundary) > > - stack_alignment = crtl->preferred_stack_boundary; > > - > > bool require_stack_frame = false; > > > > FOR_EACH_BB_FN (bb, cfun) > > @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, > > set_up_by_prologue)) > > { > > require_stack_frame = true; > > - > > - if (check_stack_slot) > > - { > > - /* Find the maximum stack alignment. */ > > - subrtx_iterator::array_type array; > > - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) > > - if (MEM_P (*iter) > > - && (reg_mentioned_p (stack_pointer_rtx, > > - *iter) > > - || reg_mentioned_p (frame_pointer_rtx, > > - *iter))) > > - { > > - unsigned int alignment = MEM_ALIGN (*iter); > > - if (alignment > stack_alignment) > > - stack_alignment = alignment; > > - } > > - } > > + break; > > } > > } > > > > cfun->machine->stack_frame_required = require_stack_frame; > > + > > + /* Stop if we don't need to check stack slot. */ > > + if (!check_stack_slot) > > + return; > > + > > + /* The preferred stack alignment is the minimum stack alignment. */ > > + if (stack_alignment > crtl->preferred_stack_boundary) > > + stack_alignment = crtl->preferred_stack_boundary; > > + > > + HARD_REG_SET stack_slot_access; > > + CLEAR_HARD_REG_SET (stack_slot_access); > > + > > + /* Stack slot can be accessed by stack pointer, frame pointer or > > + registers defined by stack pointer or frame pointer. */ > > + add_to_hard_reg_set (&stack_slot_access, Pmode, > > + STACK_POINTER_REGNUM); > > I'm possibly missing the point, but wasn't the purpose of the change to detect > "indirect" uses of the stack pointer? This now handles two levels, thus > *sp and tem = sp + 4; *tem, but it doesn't seem to handle tem2 = tem + 4; > *tem2? The previous patch did I think, but its compile-time complexity wasn't > sound. > > I think the ix86_find_all_reg_use wants to wrap itself inside sth like > > auto_bitmap worklist; > > bitmap_set_bit (worklist, reg); > do > { > int reg = bitmap_clear_first_set_bit (worklist); > /* it's current processing of reg */ > when it adds to stack_slot_access also add to worklist > } > while (!bitmap_empty_p (worklist)); Will fix. > > + ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM); > > + if (frame_pointer_needed) > > + { > > + add_to_hard_reg_set (&stack_slot_access, Pmode, > > + HARD_FRAME_POINTER_REGNUM); > > + ix86_find_all_reg_use (stack_slot_access, > > + HARD_FRAME_POINTER_REGNUM); > > + } > > + > > + for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) > > + if (GENERAL_REGNO_P (i) > > + && TEST_HARD_REG_BIT (stack_slot_access, i)) > > You can as well iterate over the stack_slot_access regset here? Will fix. Thanks. > > + for (df_ref ref = DF_REG_USE_CHAIN (i); > > + ref != NULL; > > + ref = DF_REF_NEXT_REG (ref)) > > + { > > + if (DF_REF_IS_ARTIFICIAL (ref)) > > + continue; > > + > > + rtx_insn *insn = DF_REF_INSN (ref); > > + if (!NONDEBUG_INSN_P (insn)) > > + continue; > > + note_stores (insn, ix86_update_stack_alignment, > > + &stack_alignment); > > + } > > } > > > > /* Finalize stack_realign_needed and frame_pointer_needed flags, which > > diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C > > new file mode 100644 > > index 00000000000..7e3eabdec94 > > --- /dev/null > > +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C > > @@ -0,0 +1,72 @@ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target c++17 } */ > > +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */ > > + > > +template <typename _Tp> struct remove_reference { > > + using type = __remove_reference(_Tp); > > +}; > > +template <typename T> struct MaybeStorageBase { > > + T val; > > + struct Union { > > + ~Union(); > > + } mStorage; > > +}; > > +template <typename T> struct MaybeStorage : MaybeStorageBase<T> { > > + char mIsSome; > > +}; > > +template <typename T, typename U = typename remove_reference<T>::type> > > +constexpr MaybeStorage<U> Some(T &&); > > +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) { > > + return {aValue}; > > +} > > +template <class> struct Span { > > + int operator[](long idx) { > > + int *__trans_tmp_4; > > + if (__builtin_expect(idx, 0)) > > + *(int *)__null = false; > > + __trans_tmp_4 = storage_.data(); > > + return __trans_tmp_4[idx]; > > + } > > + struct { > > + int *data() { return data_; } > > + int *data_; > > + } storage_; > > +}; > > +struct Variant { > > + template <typename RefT> Variant(RefT) {} > > +}; > > +long from_i, from___trans_tmp_9; > > +namespace js::intl { > > +struct DecimalNumber { > > + Variant string_; > > + unsigned long significandStart_; > > + unsigned long significandEnd_; > > + bool zero_ = false; > > + bool negative_; > > + template <typename CharT> DecimalNumber(CharT string) : string_(string) {} > > + template <typename CharT> > > + static MaybeStorage<DecimalNumber> from(Span<const CharT>); > > + void from(); > > +}; > > +} // namespace js::intl > > +void js::intl::DecimalNumber::from() { > > + Span<const char16_t> __trans_tmp_3; > > + from(__trans_tmp_3); > > +} > > +template <typename CharT> > > +MaybeStorage<js::intl::DecimalNumber> > > +js::intl::DecimalNumber::from(Span<const CharT> chars) { > > + DecimalNumber number(chars); > > + if (auto ch = chars[from_i]) { > > + from_i++; > > + number.negative_ = ch == '-'; > > + } > > + while (from___trans_tmp_9 && chars[from_i]) > > + ; > > + if (chars[from_i]) > > + while (chars[from_i - 1]) > > + number.zero_ = true; > > + return Some(number); > > +} > > + > > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c > > new file mode 100644 > > index 00000000000..6b06947f2a5 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c > > @@ -0,0 +1,14 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=skylake" } */ > > + > > +char perm[64]; > > + > > +void > > +__attribute__((noipa)) > > +foo (int n) > > +{ > > + for (int i = 0; i < n; ++i) > > + perm[i] = i; > > +} > > + > > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c > > new file mode 100644 > > index 00000000000..152da06c6ad > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c > > @@ -0,0 +1,21 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=skylake" } */ > > + > > +#define N 9 > > + > > +void > > +f (double x, double y, double *res) > > +{ > > + y = -y; > > + for (int i = 0; i < N; ++i) > > + { > > + double tmp = y; > > + y = x; > > + x = tmp; > > + res[i] = i; > > + } > > + res[N] = y * y; > > + res[N + 1] = x; > > +} > > + > > +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ > > -- > > 2.41.0 > >
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index caca74d6dec..27f349b0ccb 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -8084,6 +8084,63 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Update the maximum stack slot alignment from memory alignment in + PAT. */ + +static void +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) +{ + /* This insn may reference stack slot. Update the maximum stack slot + alignment. */ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, pat, ALL) + if (MEM_P (*iter)) + { + unsigned int alignment = MEM_ALIGN (*iter); + unsigned int *stack_alignment + = (unsigned int *) data; + if (alignment > *stack_alignment) + *stack_alignment = alignment; + break; + } +} + +/* Find all registers defined with REG. */ + +static void +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, int reg) +{ + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + if (!NONDEBUG_INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (!set) + continue; + + rtx src = SET_SRC (set); + if (MEM_P (src)) + continue; + + rtx dest = SET_DEST (set); + if (!REG_P (dest)) + continue; + + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) + continue; + + /* Add this register to stack_slot_access. */ + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); + } +} + /* Set stack_frame_required to false if stack frame isn't required. Update STACK_ALIGNMENT to the largest alignment, in bits, of stack slot used if stack frame is required and CHECK_STACK_SLOT is true. */ @@ -8102,10 +8159,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, add_to_hard_reg_set (&set_up_by_prologue, Pmode, HARD_FRAME_POINTER_REGNUM); - /* The preferred stack alignment is the minimum stack alignment. */ - if (stack_alignment > crtl->preferred_stack_boundary) - stack_alignment = crtl->preferred_stack_boundary; - bool require_stack_frame = false; FOR_EACH_BB_FN (bb, cfun) @@ -8117,27 +8170,52 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, set_up_by_prologue)) { require_stack_frame = true; - - if (check_stack_slot) - { - /* Find the maximum stack alignment. */ - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) - if (MEM_P (*iter) - && (reg_mentioned_p (stack_pointer_rtx, - *iter) - || reg_mentioned_p (frame_pointer_rtx, - *iter))) - { - unsigned int alignment = MEM_ALIGN (*iter); - if (alignment > stack_alignment) - stack_alignment = alignment; - } - } + break; } } cfun->machine->stack_frame_required = require_stack_frame; + + /* Stop if we don't need to check stack slot. */ + if (!check_stack_slot) + return; + + /* The preferred stack alignment is the minimum stack alignment. */ + if (stack_alignment > crtl->preferred_stack_boundary) + stack_alignment = crtl->preferred_stack_boundary; + + HARD_REG_SET stack_slot_access; + CLEAR_HARD_REG_SET (stack_slot_access); + + /* Stack slot can be accessed by stack pointer, frame pointer or + registers defined by stack pointer or frame pointer. */ + add_to_hard_reg_set (&stack_slot_access, Pmode, + STACK_POINTER_REGNUM); + ix86_find_all_reg_use (stack_slot_access, STACK_POINTER_REGNUM); + if (frame_pointer_needed) + { + add_to_hard_reg_set (&stack_slot_access, Pmode, + HARD_FRAME_POINTER_REGNUM); + ix86_find_all_reg_use (stack_slot_access, + HARD_FRAME_POINTER_REGNUM); + } + + for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (GENERAL_REGNO_P (i) + && TEST_HARD_REG_BIT (stack_slot_access, i)) + for (df_ref ref = DF_REG_USE_CHAIN (i); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + if (!NONDEBUG_INSN_P (insn)) + continue; + note_stores (insn, ix86_update_stack_alignment, + &stack_alignment); + } } /* Finalize stack_realign_needed and frame_pointer_needed flags, which diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C new file mode 100644 index 00000000000..7e3eabdec94 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C @@ -0,0 +1,72 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target c++17 } */ +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */ + +template <typename _Tp> struct remove_reference { + using type = __remove_reference(_Tp); +}; +template <typename T> struct MaybeStorageBase { + T val; + struct Union { + ~Union(); + } mStorage; +}; +template <typename T> struct MaybeStorage : MaybeStorageBase<T> { + char mIsSome; +}; +template <typename T, typename U = typename remove_reference<T>::type> +constexpr MaybeStorage<U> Some(T &&); +template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) { + return {aValue}; +} +template <class> struct Span { + int operator[](long idx) { + int *__trans_tmp_4; + if (__builtin_expect(idx, 0)) + *(int *)__null = false; + __trans_tmp_4 = storage_.data(); + return __trans_tmp_4[idx]; + } + struct { + int *data() { return data_; } + int *data_; + } storage_; +}; +struct Variant { + template <typename RefT> Variant(RefT) {} +}; +long from_i, from___trans_tmp_9; +namespace js::intl { +struct DecimalNumber { + Variant string_; + unsigned long significandStart_; + unsigned long significandEnd_; + bool zero_ = false; + bool negative_; + template <typename CharT> DecimalNumber(CharT string) : string_(string) {} + template <typename CharT> + static MaybeStorage<DecimalNumber> from(Span<const CharT>); + void from(); +}; +} // namespace js::intl +void js::intl::DecimalNumber::from() { + Span<const char16_t> __trans_tmp_3; + from(__trans_tmp_3); +} +template <typename CharT> +MaybeStorage<js::intl::DecimalNumber> +js::intl::DecimalNumber::from(Span<const CharT> chars) { + DecimalNumber number(chars); + if (auto ch = chars[from_i]) { + from_i++; + number.negative_ = ch == '-'; + } + while (from___trans_tmp_9 && chars[from_i]) + ; + if (chars[from_i]) + while (chars[from_i - 1]) + number.zero_ = true; + return Some(number); +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c new file mode 100644 index 00000000000..6b06947f2a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake" } */ + +char perm[64]; + +void +__attribute__((noipa)) +foo (int n) +{ + for (int i = 0; i < n; ++i) + perm[i] = i; +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c new file mode 100644 index 00000000000..152da06c6ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake" } */ + +#define N 9 + +void +f (double x, double y, double *res) +{ + y = -y; + for (int i = 0; i < N; ++i) + { + double tmp = y; + y = x; + x = tmp; + res[i] = i; + } + res[N] = y * y; + res[N + 1] = x; +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */