Message ID | 20160126005828.GA31989@intel.com |
---|---|
State | New |
Headers | show |
On Tue, Jan 26, 2016 at 1:58 AM, H.J. Lu <hongjiu.lu@intel.com> wrote: > Stack alignment adjustment for __tls_get_addr should be done in > ix86_update_stack_boundary, not ix86_compute_frame_layout. Also > there is no need to over-align stack for __tls_get_addr and function > with __tls_get_addr call isn't a leaf function. > > Tested on x86-64 with -m32 on testsuite. OK for trunk? OK, but please write the second part without extra parenthesis as: unsigned int stack_realign = (incoming_stack_boundary < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor ? crtl->max_used_stack_slot_alignment : crtl->stack_alignment_needed)); Thanks, Uros. > > Thanks. > > H.J. > --- > gcc/ > > PR target/68986 > * config/i386/i386.c (ix86_compute_frame_layout): Move stack > alignment adjustment to ... > (ix86_update_stack_boundary): Here. Don't over-align stack for > __tls_get_addr. > (ix86_finalize_stack_realign_flags): Use stack_alignment_needed > if __tls_get_addr is called. > > gcc/testsuite/ > > PR target/68986 > * gcc.target/i386/pr68986-1.c: New test. > * gcc.target/i386/pr68986-2.c: Likewise. > * gcc.target/i386/pr68986-3.c: Likewise. > --- > gcc/config/i386/i386.c | 24 +++++++++++------------- > gcc/testsuite/gcc.target/i386/pr68986-1.c | 11 +++++++++++ > gcc/testsuite/gcc.target/i386/pr68986-2.c | 13 +++++++++++++ > gcc/testsuite/gcc.target/i386/pr68986-3.c | 13 +++++++++++++ > 4 files changed, 48 insertions(+), 13 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr68986-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr68986-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr68986-3.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 34b57a4..9c27ea9 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -11360,18 +11360,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame) > crtl->preferred_stack_boundary = 128; > crtl->stack_alignment_needed = 128; > } > - /* preferred_stack_boundary is never updated for call > - expanded from tls descriptor. Update it here. We don't update it in > - expand stage because according to the comments before > - ix86_current_function_calls_tls_descriptor, tls calls may be optimized > - away. */ > - else if (ix86_current_function_calls_tls_descriptor > - && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) > - { > - crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; > - if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) > - crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; > - } > > stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; > preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; > @@ -12043,6 +12031,15 @@ ix86_update_stack_boundary (void) > && cfun->stdarg > && crtl->stack_alignment_estimated < 128) > crtl->stack_alignment_estimated = 128; > + > + /* __tls_get_addr needs to be called with 16-byte aligned stack. */ > + if (ix86_tls_descriptor_calls_expanded_in_cfun > + && crtl->preferred_stack_boundary < 128) > + { > + crtl->preferred_stack_boundary = 128; > + if (crtl->stack_alignment_needed < 128) > + crtl->stack_alignment_needed = 128; > + } > } > > /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is > @@ -12506,7 +12503,8 @@ ix86_finalize_stack_realign_flags (void) > = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary > ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); > unsigned int stack_realign = (incoming_stack_boundary > - < (crtl->is_leaf > + < ((crtl->is_leaf > + && !ix86_current_function_calls_tls_descriptor) > ? crtl->max_used_stack_slot_alignment > : crtl->stack_alignment_needed)); > > diff --git a/gcc/testsuite/gcc.target/i386/pr68986-1.c b/gcc/testsuite/gcc.target/i386/pr68986-1.c > new file mode 100644 > index 0000000..998f34f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr68986-1.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target tls_native } */ > +/* { dg-require-effective-target fpic } */ > +/* { dg-options "-fPIC -mno-accumulate-outgoing-args -mpreferred-stack-boundary=5 -mincoming-stack-boundary=4" } */ > + > +extern __thread int msgdata; > +int > +foo () > +{ > + return msgdata; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr68986-2.c b/gcc/testsuite/gcc.target/i386/pr68986-2.c > new file mode 100644 > index 0000000..23f9a52 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr68986-2.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile { target ia32 } } */ > +/* { dg-require-effective-target tls_native } */ > +/* { dg-require-effective-target fpic } */ > +/* { dg-options "-fPIC -mno-accumulate-outgoing-args -mpreferred-stack-boundary=2 -m32" } */ > + > +extern __thread int msgdata; > +int > +foo () > +{ > + return msgdata; > +} > + > +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr68986-3.c b/gcc/testsuite/gcc.target/i386/pr68986-3.c > new file mode 100644 > index 0000000..5744cf2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr68986-3.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target tls_native } */ > +/* { dg-require-effective-target fpic } */ > +/* { dg-options "-fPIC -mno-sse -mpreferred-stack-boundary=3 -mincoming-stack-boundary=3" } */ > + > +extern __thread int msgdata; > +int > +foo () > +{ > + return msgdata; > +} > + > +/* { dg-final { scan-assembler "and\[lq\]\[\\t \]*\\$-16,\[\\t \]*%\[re\]?sp" } } */ > -- > 2.5.0 >
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 34b57a4..9c27ea9 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -11360,18 +11360,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame) crtl->preferred_stack_boundary = 128; crtl->stack_alignment_needed = 128; } - /* preferred_stack_boundary is never updated for call - expanded from tls descriptor. Update it here. We don't update it in - expand stage because according to the comments before - ix86_current_function_calls_tls_descriptor, tls calls may be optimized - away. */ - else if (ix86_current_function_calls_tls_descriptor - && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) - { - crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; - if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) - crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; - } stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; @@ -12043,6 +12031,15 @@ ix86_update_stack_boundary (void) && cfun->stdarg && crtl->stack_alignment_estimated < 128) crtl->stack_alignment_estimated = 128; + + /* __tls_get_addr needs to be called with 16-byte aligned stack. */ + if (ix86_tls_descriptor_calls_expanded_in_cfun + && crtl->preferred_stack_boundary < 128) + { + crtl->preferred_stack_boundary = 128; + if (crtl->stack_alignment_needed < 128) + crtl->stack_alignment_needed = 128; + } } /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is @@ -12506,7 +12503,8 @@ ix86_finalize_stack_realign_flags (void) = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); unsigned int stack_realign = (incoming_stack_boundary - < (crtl->is_leaf + < ((crtl->is_leaf + && !ix86_current_function_calls_tls_descriptor) ? crtl->max_used_stack_slot_alignment : crtl->stack_alignment_needed)); diff --git a/gcc/testsuite/gcc.target/i386/pr68986-1.c b/gcc/testsuite/gcc.target/i386/pr68986-1.c new file mode 100644 index 0000000..998f34f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr68986-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target tls_native } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fPIC -mno-accumulate-outgoing-args -mpreferred-stack-boundary=5 -mincoming-stack-boundary=4" } */ + +extern __thread int msgdata; +int +foo () +{ + return msgdata; +} diff --git a/gcc/testsuite/gcc.target/i386/pr68986-2.c b/gcc/testsuite/gcc.target/i386/pr68986-2.c new file mode 100644 index 0000000..23f9a52 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr68986-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-require-effective-target tls_native } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fPIC -mno-accumulate-outgoing-args -mpreferred-stack-boundary=2 -m32" } */ + +extern __thread int msgdata; +int +foo () +{ + return msgdata; +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr68986-3.c b/gcc/testsuite/gcc.target/i386/pr68986-3.c new file mode 100644 index 0000000..5744cf2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr68986-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target tls_native } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fPIC -mno-sse -mpreferred-stack-boundary=3 -mincoming-stack-boundary=3" } */ + +extern __thread int msgdata; +int +foo () +{ + return msgdata; +} + +/* { dg-final { scan-assembler "and\[lq\]\[\\t \]*\\$-16,\[\\t \]*%\[re\]?sp" } } */