Message ID | CA+4CFy5BdB2=mnd=o+8GmQ5EBtJfGUFB-0AKFou2xMHvekCAJw@mail.gmail.com |
---|---|
State | New |
Headers | show |
Regression test is ok. Thanks, Wei. On Fri, Mar 7, 2014 at 1:26 PM, Wei Mi <wmi@google.com> wrote: > Hi, > > This patch is to fix the problem described here: > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 > > I follow Ian's suggestion and set > ix86_tls_descriptor_calls_expanded_in_cfun in > tls_global_dynamic_64_<mode> and tls_local_dynamic_base_64_<mode>. > Although 32bit doesn't have the problem, > ix86_tls_descriptor_calls_expanded_in_cfun is also set for > tls_global_dynamic_32 and tls_local_dynamic_base_32 to make > ix86_tls_descriptor_calls_expanded_in_cfun setting consistent across > 32bits and 64bits. > > If ix86_current_function_calls_tls_descriptor is set, we know that > there is tls expanded call in current function. Update > crtl->preferred_stack_boundary and crtl->stack_alignment_needed to be > no less than PREFERED_STACK_ALIGNMENT at the start of > ix86_compute_frame_layout. We don't do the update in > legitimize_tls_address in cfgexpand stage, which is too early because > according to the comments before > ix86_current_function_calls_tls_descriptor, tls call may be optimized > away. ix86_compute_frame_layout is the latest place to do the update. > > bootstrap on x86_64-linux-gnu is ok. regression test is going on. Ok > for trunk if tests pass? > > Thanks, > Wei. > > gcc/ChangeLog: > > 2014-03-07 Wei Mi <wmi@google.com> > > * config/i386/i386.c (ix86_compute_frame_layout): Update > preferred_stack_boundary when there is tls expanded call. > * config/i386/i386.md: Set > ix86_tls_descriptor_calls_expanded_in_cfun. > > gcc/testsuite/ChangeLog: > > 2014-03-07 Wei Mi <wmi@google.com> > > * g++.dg/pr58066.C: New test. > > > Index: gcc/config/i386/i386.c > =================================================================== > --- gcc/config/i386/i386.c (revision 208410) > +++ gcc/config/i386/i386.c (working copy) > @@ -9504,6 +9504,19 @@ ix86_compute_frame_layout (struct ix86_f > crtl->preferred_stack_boundary = 128; > crtl->stack_alignment_needed = 128; > } > + /* For 64-bit target, preferred_stack_boundary is never updated for call > + expanded from tls descriptor. Update it here. We don't update it in > + expand stage because according to the comments before > + ix86_current_function_calls_tls_descriptor, tls calls may be optimized > + away. */ > + else if (TARGET_64BIT > + && ix86_current_function_calls_tls_descriptor > + && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) > + { > + crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; > + if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) > + crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; > + } > > gcc_assert (!size || stack_alignment_needed); > gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); > Index: gcc/config/i386/i386.md > =================================================================== > --- gcc/config/i386/i386.md (revision 208410) > +++ gcc/config/i386/i386.md (working copy) > @@ -12891,7 +12891,11 @@ > UNSPEC_TLS_GD)) > (clobber (match_scratch:SI 4)) > (clobber (match_scratch:SI 5)) > - (clobber (reg:CC FLAGS_REG))])]) > + (clobber (reg:CC FLAGS_REG))])] > + "" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > (define_insn "*tls_global_dynamic_64_<mode>" > [(set (match_operand:P 0 "register_operand" "=a") > @@ -12946,7 +12950,10 @@ > (const_int 0))) > (unspec:P [(match_operand 1 "tls_symbolic_operand")] > UNSPEC_TLS_GD)])] > - "TARGET_64BIT") > + "TARGET_64BIT" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > (define_insn "*tls_local_dynamic_base_32_gnu" > [(set (match_operand:SI 0 "register_operand" "=a") > @@ -12982,7 +12989,11 @@ > UNSPEC_TLS_LD_BASE)) > (clobber (match_scratch:SI 3)) > (clobber (match_scratch:SI 4)) > - (clobber (reg:CC FLAGS_REG))])]) > + (clobber (reg:CC FLAGS_REG))])] > + "" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > (define_insn "*tls_local_dynamic_base_64_<mode>" > [(set (match_operand:P 0 "register_operand" "=a") > @@ -13029,7 +13040,10 @@ > (mem:QI (match_operand 1)) > (const_int 0))) > (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] > - "TARGET_64BIT") > + "TARGET_64BIT" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > ;; Local dynamic of a single variable is a lose. Show combine how > ;; to convert that back to global dynamic. > Index: gcc/testsuite/g++.dg/pr58066.C > =================================================================== > --- gcc/testsuite/g++.dg/pr58066.C (revision 0) > +++ gcc/testsuite/g++.dg/pr58066.C (revision 0) > @@ -0,0 +1,12 @@ > +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ > +/* { dg-options "-fPIC -O2 -m64" } */ > + > +/* Check whether the stack frame starting address of tls expanded call > + in __cxa_get_globals() is 16bytes aligned. */ > +static __thread char ccc; > +extern "C" void* __cxa_get_globals() throw() > +{ > + return &ccc; > +} > + > +/* { dg-final { scan-assembler ".cfi_def_cfa_offset 16" } } */
On Fri, Mar 7, 2014 at 1:26 PM, Wei Mi <wmi@google.com> wrote: > Hi, > > This patch is to fix the problem described here: > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 > > I follow Ian's suggestion and set > ix86_tls_descriptor_calls_expanded_in_cfun in > tls_global_dynamic_64_<mode> and tls_local_dynamic_base_64_<mode>. > Although 32bit doesn't have the problem, > ix86_tls_descriptor_calls_expanded_in_cfun is also set for > tls_global_dynamic_32 and tls_local_dynamic_base_32 to make > ix86_tls_descriptor_calls_expanded_in_cfun setting consistent across > 32bits and 64bits. > > If ix86_current_function_calls_tls_descriptor is set, we know that > there is tls expanded call in current function. Update > crtl->preferred_stack_boundary and crtl->stack_alignment_needed to be > no less than PREFERED_STACK_ALIGNMENT at the start of > ix86_compute_frame_layout. We don't do the update in > legitimize_tls_address in cfgexpand stage, which is too early because > according to the comments before > ix86_current_function_calls_tls_descriptor, tls call may be optimized > away. ix86_compute_frame_layout is the latest place to do the update. > > bootstrap on x86_64-linux-gnu is ok. regression test is going on. Ok > for trunk if tests pass? > > Thanks, > Wei. > > gcc/ChangeLog: > > 2014-03-07 Wei Mi <wmi@google.com> > > * config/i386/i386.c (ix86_compute_frame_layout): Update > preferred_stack_boundary when there is tls expanded call. > * config/i386/i386.md: Set > ix86_tls_descriptor_calls_expanded_in_cfun. > > gcc/testsuite/ChangeLog: > > 2014-03-07 Wei Mi <wmi@google.com> > > * g++.dg/pr58066.C: New test. > > > Index: gcc/config/i386/i386.c > =================================================================== > --- gcc/config/i386/i386.c (revision 208410) > +++ gcc/config/i386/i386.c (working copy) > @@ -9504,6 +9504,19 @@ ix86_compute_frame_layout (struct ix86_f > crtl->preferred_stack_boundary = 128; > crtl->stack_alignment_needed = 128; > } > + /* For 64-bit target, preferred_stack_boundary is never updated for call > + expanded from tls descriptor. Update it here. We don't update it in > + expand stage because according to the comments before > + ix86_current_function_calls_tls_descriptor, tls calls may be optimized > + away. */ > + else if (TARGET_64BIT > + && ix86_current_function_calls_tls_descriptor > + && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) > + { > + crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; > + if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) > + crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; > + } > > gcc_assert (!size || stack_alignment_needed); > gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); > Index: gcc/config/i386/i386.md > =================================================================== > --- gcc/config/i386/i386.md (revision 208410) > +++ gcc/config/i386/i386.md (working copy) > @@ -12891,7 +12891,11 @@ > UNSPEC_TLS_GD)) > (clobber (match_scratch:SI 4)) > (clobber (match_scratch:SI 5)) > - (clobber (reg:CC FLAGS_REG))])]) > + (clobber (reg:CC FLAGS_REG))])] > + "" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > (define_insn "*tls_global_dynamic_64_<mode>" > [(set (match_operand:P 0 "register_operand" "=a") > @@ -12946,7 +12950,10 @@ > (const_int 0))) > (unspec:P [(match_operand 1 "tls_symbolic_operand")] > UNSPEC_TLS_GD)])] > - "TARGET_64BIT") > + "TARGET_64BIT" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > (define_insn "*tls_local_dynamic_base_32_gnu" > [(set (match_operand:SI 0 "register_operand" "=a") > @@ -12982,7 +12989,11 @@ > UNSPEC_TLS_LD_BASE)) > (clobber (match_scratch:SI 3)) > (clobber (match_scratch:SI 4)) > - (clobber (reg:CC FLAGS_REG))])]) > + (clobber (reg:CC FLAGS_REG))])] > + "" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > (define_insn "*tls_local_dynamic_base_64_<mode>" > [(set (match_operand:P 0 "register_operand" "=a") > @@ -13029,7 +13040,10 @@ > (mem:QI (match_operand 1)) > (const_int 0))) > (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] > - "TARGET_64BIT") > + "TARGET_64BIT" > +{ > + ix86_tls_descriptor_calls_expanded_in_cfun = true; > +}) > > ;; Local dynamic of a single variable is a lose. Show combine how > ;; to convert that back to global dynamic. > Index: gcc/testsuite/g++.dg/pr58066.C > =================================================================== > --- gcc/testsuite/g++.dg/pr58066.C (revision 0) > +++ gcc/testsuite/g++.dg/pr58066.C (revision 0) > @@ -0,0 +1,12 @@ > +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ ^^^^^ It should be not ia32. since we also want to test it for x32. > +/* { dg-options "-fPIC -O2 -m64" } */ ^^^^^^^^ No need to add -m64. > + > +/* Check whether the stack frame starting address of tls expanded call > + in __cxa_get_globals() is 16bytes aligned. */ > +static __thread char ccc; > +extern "C" void* __cxa_get_globals() throw() > +{ > + return &ccc; > +} > + > +/* { dg-final { scan-assembler ".cfi_def_cfa_offset 16" } } */
Yes, x32 has the same problem. It should be tested. Fixed. Thanks, Wei. On Fri, Mar 7, 2014 at 2:06 PM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Fri, Mar 7, 2014 at 1:26 PM, Wei Mi <wmi@google.com> wrote: >> Hi, >> >> This patch is to fix the problem described here: >> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 >> >> I follow Ian's suggestion and set >> ix86_tls_descriptor_calls_expanded_in_cfun in >> tls_global_dynamic_64_<mode> and tls_local_dynamic_base_64_<mode>. >> Although 32bit doesn't have the problem, >> ix86_tls_descriptor_calls_expanded_in_cfun is also set for >> tls_global_dynamic_32 and tls_local_dynamic_base_32 to make >> ix86_tls_descriptor_calls_expanded_in_cfun setting consistent across >> 32bits and 64bits. >> >> If ix86_current_function_calls_tls_descriptor is set, we know that >> there is tls expanded call in current function. Update >> crtl->preferred_stack_boundary and crtl->stack_alignment_needed to be >> no less than PREFERED_STACK_ALIGNMENT at the start of >> ix86_compute_frame_layout. We don't do the update in >> legitimize_tls_address in cfgexpand stage, which is too early because >> according to the comments before >> ix86_current_function_calls_tls_descriptor, tls call may be optimized >> away. ix86_compute_frame_layout is the latest place to do the update. >> >> bootstrap on x86_64-linux-gnu is ok. regression test is going on. Ok >> for trunk if tests pass? >> >> Thanks, >> Wei. >> >> gcc/ChangeLog: >> >> 2014-03-07 Wei Mi <wmi@google.com> >> >> * config/i386/i386.c (ix86_compute_frame_layout): Update >> preferred_stack_boundary when there is tls expanded call. >> * config/i386/i386.md: Set >> ix86_tls_descriptor_calls_expanded_in_cfun. >> >> gcc/testsuite/ChangeLog: >> >> 2014-03-07 Wei Mi <wmi@google.com> >> >> * g++.dg/pr58066.C: New test. >> >> >> Index: gcc/config/i386/i386.c >> =================================================================== >> --- gcc/config/i386/i386.c (revision 208410) >> +++ gcc/config/i386/i386.c (working copy) >> @@ -9504,6 +9504,19 @@ ix86_compute_frame_layout (struct ix86_f >> crtl->preferred_stack_boundary = 128; >> crtl->stack_alignment_needed = 128; >> } >> + /* For 64-bit target, preferred_stack_boundary is never updated for call >> + expanded from tls descriptor. Update it here. We don't update it in >> + expand stage because according to the comments before >> + ix86_current_function_calls_tls_descriptor, tls calls may be optimized >> + away. */ >> + else if (TARGET_64BIT >> + && ix86_current_function_calls_tls_descriptor >> + && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) >> + { >> + crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; >> + if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) >> + crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; >> + } >> >> gcc_assert (!size || stack_alignment_needed); >> gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); >> Index: gcc/config/i386/i386.md >> =================================================================== >> --- gcc/config/i386/i386.md (revision 208410) >> +++ gcc/config/i386/i386.md (working copy) >> @@ -12891,7 +12891,11 @@ >> UNSPEC_TLS_GD)) >> (clobber (match_scratch:SI 4)) >> (clobber (match_scratch:SI 5)) >> - (clobber (reg:CC FLAGS_REG))])]) >> + (clobber (reg:CC FLAGS_REG))])] >> + "" >> +{ >> + ix86_tls_descriptor_calls_expanded_in_cfun = true; >> +}) >> >> (define_insn "*tls_global_dynamic_64_<mode>" >> [(set (match_operand:P 0 "register_operand" "=a") >> @@ -12946,7 +12950,10 @@ >> (const_int 0))) >> (unspec:P [(match_operand 1 "tls_symbolic_operand")] >> UNSPEC_TLS_GD)])] >> - "TARGET_64BIT") >> + "TARGET_64BIT" >> +{ >> + ix86_tls_descriptor_calls_expanded_in_cfun = true; >> +}) >> >> (define_insn "*tls_local_dynamic_base_32_gnu" >> [(set (match_operand:SI 0 "register_operand" "=a") >> @@ -12982,7 +12989,11 @@ >> UNSPEC_TLS_LD_BASE)) >> (clobber (match_scratch:SI 3)) >> (clobber (match_scratch:SI 4)) >> - (clobber (reg:CC FLAGS_REG))])]) >> + (clobber (reg:CC FLAGS_REG))])] >> + "" >> +{ >> + ix86_tls_descriptor_calls_expanded_in_cfun = true; >> +}) >> >> (define_insn "*tls_local_dynamic_base_64_<mode>" >> [(set (match_operand:P 0 "register_operand" "=a") >> @@ -13029,7 +13040,10 @@ >> (mem:QI (match_operand 1)) >> (const_int 0))) >> (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] >> - "TARGET_64BIT") >> + "TARGET_64BIT" >> +{ >> + ix86_tls_descriptor_calls_expanded_in_cfun = true; >> +}) >> >> ;; Local dynamic of a single variable is a lose. Show combine how >> ;; to convert that back to global dynamic. >> Index: gcc/testsuite/g++.dg/pr58066.C >> =================================================================== >> --- gcc/testsuite/g++.dg/pr58066.C (revision 0) >> +++ gcc/testsuite/g++.dg/pr58066.C (revision 0) >> @@ -0,0 +1,12 @@ >> +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ > > ^^^^^ It should be not ia32. > since we also want to test it for x32. > >> +/* { dg-options "-fPIC -O2 -m64" } */ > ^^^^^^^^ No need to add -m64. >> + >> +/* Check whether the stack frame starting address of tls expanded call >> + in __cxa_get_globals() is 16bytes aligned. */ >> +static __thread char ccc; >> +extern "C" void* __cxa_get_globals() throw() >> +{ >> + return &ccc; >> +} >> + >> +/* { dg-final { scan-assembler ".cfi_def_cfa_offset 16" } } */ > > > > -- > H.J.
On Fri, Mar 7, 2014 at 2:33 PM, Wei Mi <wmi@google.com> wrote: > Yes, x32 has the same problem. It should be tested. Fixed. > > Thanks, > Wei. > > > On Fri, Mar 7, 2014 at 2:06 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >> On Fri, Mar 7, 2014 at 1:26 PM, Wei Mi <wmi@google.com> wrote: >>> Hi, >>> >>> This patch is to fix the problem described here: >>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 >>> >>> I follow Ian's suggestion and set >>> ix86_tls_descriptor_calls_expanded_in_cfun in >>> tls_global_dynamic_64_<mode> and tls_local_dynamic_base_64_<mode>. >>> Although 32bit doesn't have the problem, >>> ix86_tls_descriptor_calls_expanded_in_cfun is also set for >>> tls_global_dynamic_32 and tls_local_dynamic_base_32 to make >>> ix86_tls_descriptor_calls_expanded_in_cfun setting consistent across >>> 32bits and 64bits. >>> >>> If ix86_current_function_calls_tls_descriptor is set, we know that >>> there is tls expanded call in current function. Update >>> crtl->preferred_stack_boundary and crtl->stack_alignment_needed to be >>> no less than PREFERED_STACK_ALIGNMENT at the start of >>> ix86_compute_frame_layout. We don't do the update in >>> legitimize_tls_address in cfgexpand stage, which is too early because >>> according to the comments before >>> ix86_current_function_calls_tls_descriptor, tls call may be optimized >>> away. ix86_compute_frame_layout is the latest place to do the update. >>> >>> bootstrap on x86_64-linux-gnu is ok. regression test is going on. Ok >>> for trunk if tests pass? >>> >>> Thanks, >>> Wei. >>> >>> gcc/ChangeLog: >>> >>> 2014-03-07 Wei Mi <wmi@google.com> >>> >>> * config/i386/i386.c (ix86_compute_frame_layout): Update >>> preferred_stack_boundary when there is tls expanded call. >>> * config/i386/i386.md: Set >>> ix86_tls_descriptor_calls_expanded_in_cfun. >>> >>> gcc/testsuite/ChangeLog: >>> >>> 2014-03-07 Wei Mi <wmi@google.com> >>> >>> * g++.dg/pr58066.C: New test. >>> >>> >>> Index: gcc/config/i386/i386.c >>> =================================================================== >>> --- gcc/config/i386/i386.c (revision 208410) >>> +++ gcc/config/i386/i386.c (working copy) >>> @@ -9504,6 +9504,19 @@ ix86_compute_frame_layout (struct ix86_f >>> crtl->preferred_stack_boundary = 128; >>> crtl->stack_alignment_needed = 128; >>> } >>> + /* For 64-bit target, preferred_stack_boundary is never updated for call >>> + expanded from tls descriptor. Update it here. We don't update it in >>> + expand stage because according to the comments before >>> + ix86_current_function_calls_tls_descriptor, tls calls may be optimized >>> + away. */ >>> + else if (TARGET_64BIT >>> + && ix86_current_function_calls_tls_descriptor >>> + && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) >>> + { >>> + crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; >>> + if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) >>> + crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; >>> + } >>> There are several problems with this: 1. It doesn't work with C. 2. IA32 has the same issue and isn't fixed. 3. There is no testcase for global dynamic model.
> There are several problems with this: > > 1. It doesn't work with C. Ok, I will change the testcase using C. > 2. IA32 has the same issue and isn't fixed. I thought IA32 didn't have the same issue because abi only requires 32 bit alignment for stack starting address. oh, I found the old patch http://gcc.gnu.org/ml/gcc-patches/2006-09/msg00298.html which changed the default alignment to 128bit. Ok, will remove the TARGET_64BIT constraint. > 3. There is no testcase for global dynamic model. > > -- > H.J. Will add the testcase. Thanks, Wei.
On Wed, Mar 12, 2014 at 2:03 PM, Wei Mi <wmi@google.com> wrote: >> There are several problems with this: >> >> 1. It doesn't work with C. > > Ok, I will change the testcase using C. > >> 2. IA32 has the same issue and isn't fixed. > > I thought IA32 didn't have the same issue because abi only requires 32 > bit alignment for stack starting address. > > oh, I found the old patch > http://gcc.gnu.org/ml/gcc-patches/2006-09/msg00298.html which changed > the default alignment to 128bit. Ok, will remove the TARGET_64BIT > constraint. > >> 3. There is no testcase for global dynamic model. >> >> -- >> H.J. > > Will add the testcase. > I posted a different patch in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
Hi H.J., Could you show me why you postpone the setting ix86_tls_descriptor_calls_expanded_in_cfun until reload_complete and use ix86_tls_descriptor_calls_expanded_in_cfun instead of ix86_current_function_calls_tls_descriptor? Isn't ix86_current_function_calls_tls_descriptor useful to consider the case that tls call is optimized away? Thanks, Wei. On Wed, Mar 12, 2014 at 2:07 PM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Wed, Mar 12, 2014 at 2:03 PM, Wei Mi <wmi@google.com> wrote: >>> There are several problems with this: >>> >>> 1. It doesn't work with C. >> >> Ok, I will change the testcase using C. >> >>> 2. IA32 has the same issue and isn't fixed. >> >> I thought IA32 didn't have the same issue because abi only requires 32 >> bit alignment for stack starting address. >> >> oh, I found the old patch >> http://gcc.gnu.org/ml/gcc-patches/2006-09/msg00298.html which changed >> the default alignment to 128bit. Ok, will remove the TARGET_64BIT >> constraint. >> >>> 3. There is no testcase for global dynamic model. >>> >>> -- >>> H.J. >> >> Will add the testcase. >> > > I posted a different patch in > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 > > -- > H.J.
On Wed, Mar 12, 2014 at 2:36 PM, Wei Mi <wmi@google.com> wrote: > Hi H.J., > > Could you show me why you postpone the setting > ix86_tls_descriptor_calls_expanded_in_cfun until reload_complete and > use ix86_tls_descriptor_calls_expanded_in_cfun instead of > ix86_current_function_calls_tls_descriptor? Isn't > ix86_current_function_calls_tls_descriptor useful to consider the case > that tls call is optimized away? > When a tls call is optimized away, it won't survive reload. If it does survive reload, it isn't optimized away. Also checking df_regs_ever_live_p (SP_REG) isn't reliable when called from ix86_compute_frame_layout.
Oh, I see. Thanks! Wei. On Wed, Mar 12, 2014 at 2:42 PM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Wed, Mar 12, 2014 at 2:36 PM, Wei Mi <wmi@google.com> wrote: >> Hi H.J., >> >> Could you show me why you postpone the setting >> ix86_tls_descriptor_calls_expanded_in_cfun until reload_complete and >> use ix86_tls_descriptor_calls_expanded_in_cfun instead of >> ix86_current_function_calls_tls_descriptor? Isn't >> ix86_current_function_calls_tls_descriptor useful to consider the case >> that tls call is optimized away? >> > > When a tls call is optimized away, it won't survive reload. > If it does survive reload, it isn't optimized away. Also > checking df_regs_ever_live_p (SP_REG) isn't reliable > when called from ix86_compute_frame_layout. > > -- > H.J.
Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 208410) +++ gcc/config/i386/i386.c (working copy) @@ -9504,6 +9504,19 @@ ix86_compute_frame_layout (struct ix86_f crtl->preferred_stack_boundary = 128; crtl->stack_alignment_needed = 128; } + /* For 64-bit target, preferred_stack_boundary is never updated for call + expanded from tls descriptor. Update it here. We don't update it in + expand stage because according to the comments before + ix86_current_function_calls_tls_descriptor, tls calls may be optimized + away. */ + else if (TARGET_64BIT + && ix86_current_function_calls_tls_descriptor + && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) + { + crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; + if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) + crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; + } gcc_assert (!size || stack_alignment_needed); gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md (revision 208410) +++ gcc/config/i386/i386.md (working copy) @@ -12891,7 +12891,11 @@ UNSPEC_TLS_GD)) (clobber (match_scratch:SI 4)) (clobber (match_scratch:SI 5)) - (clobber (reg:CC FLAGS_REG))])]) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) (define_insn "*tls_global_dynamic_64_<mode>" [(set (match_operand:P 0 "register_operand" "=a") @@ -12946,7 +12950,10 @@ (const_int 0))) (unspec:P [(match_operand 1 "tls_symbolic_operand")] UNSPEC_TLS_GD)])] - "TARGET_64BIT") + "TARGET_64BIT" +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) (define_insn "*tls_local_dynamic_base_32_gnu" [(set (match_operand:SI 0 "register_operand" "=a") @@ -12982,7 +12989,11 @@ UNSPEC_TLS_LD_BASE)) (clobber (match_scratch:SI 3)) (clobber (match_scratch:SI 4)) - (clobber (reg:CC FLAGS_REG))])]) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) (define_insn "*tls_local_dynamic_base_64_<mode>" [(set (match_operand:P 0 "register_operand" "=a") @@ -13029,7 +13040,10 @@ (mem:QI (match_operand 1)) (const_int 0))) (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] - "TARGET_64BIT") + "TARGET_64BIT" +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) ;; Local dynamic of a single variable is a lose. Show combine how ;; to convert that back to global dynamic. Index: gcc/testsuite/g++.dg/pr58066.C =================================================================== --- gcc/testsuite/g++.dg/pr58066.C (revision 0) +++ gcc/testsuite/g++.dg/pr58066.C (revision 0) @@ -0,0 +1,12 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-fPIC -O2 -m64" } */ + +/* Check whether the stack frame starting address of tls expanded call + in __cxa_get_globals() is 16bytes aligned. */ +static __thread char ccc; +extern "C" void* __cxa_get_globals() throw() +{ + return &ccc; +} + +/* { dg-final { scan-assembler ".cfi_def_cfa_offset 16" } } */