Message ID | 87r3nsm32u.fsf@e105548-lin.cambridge.arm.com |
---|---|
State | New |
Headers | show |
On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford <richard.sandiford@arm.com> wrote: > Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu. > Also tested via config-list.mk. Committed as preapproved. > > Thanks, > Richard > > > gcc/ > * target-insns.def (atomic_test_and_set): New targetm instruction > pattern. > * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of > HAVE_*/gen_* interface. > > Index: gcc/target-insns.def > =================================================================== > --- gcc/target-insns.def 2015-07-28 21:00:09.815019853 +0100 > +++ gcc/target-insns.def 2015-07-28 21:00:09.811019905 +0100 > @@ -31,6 +31,7 @@ > > Instructions should be documented in md.texi rather than here. */ > DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1)) > +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2)) > DEF_TARGET_INSN (builtin_longjmp, (rtx x0)) > DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0)) > DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0)) > Index: gcc/optabs.c > =================================================================== > --- gcc/optabs.c 2015-07-28 21:00:09.815019853 +0100 > +++ gcc/optabs.c 2015-07-28 21:00:09.811019905 +0100 > @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo > using the atomic_test_and_set instruction pattern. A boolean value > is returned from the operation, using TARGET if possible. */ > > -#ifndef HAVE_atomic_test_and_set > -#define HAVE_atomic_test_and_set 0 > -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing > -#endif > - > static rtx > maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) > { > machine_mode pat_bool_mode; > struct expand_operand ops[3]; > > - if (!HAVE_atomic_test_and_set) > + if (!targetm.have_atomic_test_and_set ()) > return NULL_RTX; I know this was not there before but this if should be marked as unlikely as most targets where someone is using __atomic_*/__sync_* will have those patterns. Thanks, Andrew Pinski > > /* While we always get QImode from __atomic_test_and_set, we get > other memory modes from __sync_lock_test_and_set. Note that we > use no endian adjustment here. This matches the 4.6 behavior > in the Sparc backend. */ > - gcc_checking_assert > - (insn_data[CODE_FOR_atomic_test_and_set].operand[1].mode == QImode); > + enum insn_code icode = targetm.code_for_atomic_test_and_set; > + gcc_checking_assert (insn_data[icode].operand[1].mode == QImode); > if (GET_MODE (mem) != QImode) > mem = adjust_address_nv (mem, QImode, 0); > > - pat_bool_mode = insn_data[CODE_FOR_atomic_test_and_set].operand[0].mode; > + pat_bool_mode = insn_data[icode].operand[0].mode; > create_output_operand (&ops[0], target, pat_bool_mode); > create_fixed_operand (&ops[1], mem); > create_integer_operand (&ops[2], model); > > - if (maybe_expand_insn (CODE_FOR_atomic_test_and_set, 3, ops)) > + if (maybe_expand_insn (icode, 3, ops)) > return ops[0].value; > return NULL_RTX; > } >
Andrew Pinski <pinskia@gmail.com> writes: > On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford > <richard.sandiford@arm.com> wrote: >> Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu. >> Also tested via config-list.mk. Committed as preapproved. >> >> Thanks, >> Richard >> >> >> gcc/ >> * target-insns.def (atomic_test_and_set): New targetm instruction >> pattern. >> * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of >> HAVE_*/gen_* interface. >> >> Index: gcc/target-insns.def >> =================================================================== >> --- gcc/target-insns.def 2015-07-28 21:00:09.815019853 +0100 >> +++ gcc/target-insns.def 2015-07-28 21:00:09.811019905 +0100 >> @@ -31,6 +31,7 @@ >> >> Instructions should be documented in md.texi rather than here. */ >> DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1)) >> +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2)) >> DEF_TARGET_INSN (builtin_longjmp, (rtx x0)) >> DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0)) >> DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0)) >> Index: gcc/optabs.c >> =================================================================== >> --- gcc/optabs.c 2015-07-28 21:00:09.815019853 +0100 >> +++ gcc/optabs.c 2015-07-28 21:00:09.811019905 +0100 >> @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo >> using the atomic_test_and_set instruction pattern. A boolean value >> is returned from the operation, using TARGET if possible. */ >> >> -#ifndef HAVE_atomic_test_and_set >> -#define HAVE_atomic_test_and_set 0 >> -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing >> -#endif >> - >> static rtx >> maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) >> { >> machine_mode pat_bool_mode; >> struct expand_operand ops[3]; >> >> - if (!HAVE_atomic_test_and_set) >> + if (!targetm.have_atomic_test_and_set ()) >> return NULL_RTX; > > I know this was not there before but this if should be marked as > unlikely as most targets where someone is using __atomic_*/__sync_* > will have those patterns. I think that'd be premature optimisation. The path being guarded here generates new rtl instructions, which is a much more expensive operation than a mispredicated branch. Thanks, Richard
On Tue, Jul 28, 2015 at 3:10 PM, Richard Sandiford <richard.sandiford@arm.com> wrote: > Andrew Pinski <pinskia@gmail.com> writes: >> On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford >> <richard.sandiford@arm.com> wrote: >>> Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu. >>> Also tested via config-list.mk. Committed as preapproved. >>> >>> Thanks, >>> Richard >>> >>> >>> gcc/ >>> * target-insns.def (atomic_test_and_set): New targetm instruction >>> pattern. >>> * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of >>> HAVE_*/gen_* interface. >>> >>> Index: gcc/target-insns.def >>> =================================================================== >>> --- gcc/target-insns.def 2015-07-28 21:00:09.815019853 +0100 >>> +++ gcc/target-insns.def 2015-07-28 21:00:09.811019905 +0100 >>> @@ -31,6 +31,7 @@ >>> >>> Instructions should be documented in md.texi rather than here. */ >>> DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1)) >>> +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2)) >>> DEF_TARGET_INSN (builtin_longjmp, (rtx x0)) >>> DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0)) >>> DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0)) >>> Index: gcc/optabs.c >>> =================================================================== >>> --- gcc/optabs.c 2015-07-28 21:00:09.815019853 +0100 >>> +++ gcc/optabs.c 2015-07-28 21:00:09.811019905 +0100 >>> @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo >>> using the atomic_test_and_set instruction pattern. A boolean value >>> is returned from the operation, using TARGET if possible. */ >>> >>> -#ifndef HAVE_atomic_test_and_set >>> -#define HAVE_atomic_test_and_set 0 >>> -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing >>> -#endif >>> - >>> static rtx >>> maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) >>> { >>> machine_mode pat_bool_mode; >>> struct expand_operand ops[3]; >>> >>> - if (!HAVE_atomic_test_and_set) >>> + if (!targetm.have_atomic_test_and_set ()) >>> return NULL_RTX; >> >> I know this was not there before but this if should be marked as >> unlikely as most targets where someone is using __atomic_*/__sync_* >> will have those patterns. > > I think that'd be premature optimisation. The path being guarded here > generates new rtl instructions, which is a much more expensive operation > than a mispredicated branch. That might be true that the rest is more expensive but the common path would be through there. It is not just about mispredicted branch but more about icache miss. Thanks, Andrew > > Thanks, > Richard >
Andrew Pinski <pinskia@gmail.com> writes: > On Tue, Jul 28, 2015 at 3:10 PM, Richard Sandiford > <richard.sandiford@arm.com> wrote: >> Andrew Pinski <pinskia@gmail.com> writes: >>> On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford >>> <richard.sandiford@arm.com> wrote: >>>> Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu. >>>> Also tested via config-list.mk. Committed as preapproved. >>>> >>>> Thanks, >>>> Richard >>>> >>>> >>>> gcc/ >>>> * target-insns.def (atomic_test_and_set): New targetm instruction >>>> pattern. >>>> * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of >>>> HAVE_*/gen_* interface. >>>> >>>> Index: gcc/target-insns.def >>>> =================================================================== >>>> --- gcc/target-insns.def 2015-07-28 21:00:09.815019853 +0100 >>>> +++ gcc/target-insns.def 2015-07-28 21:00:09.811019905 +0100 >>>> @@ -31,6 +31,7 @@ >>>> >>>> Instructions should be documented in md.texi rather than here. */ >>>> DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1)) >>>> +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2)) >>>> DEF_TARGET_INSN (builtin_longjmp, (rtx x0)) >>>> DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0)) >>>> DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0)) >>>> Index: gcc/optabs.c >>>> =================================================================== >>>> --- gcc/optabs.c 2015-07-28 21:00:09.815019853 +0100 >>>> +++ gcc/optabs.c 2015-07-28 21:00:09.811019905 +0100 >>>> @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo >>>> using the atomic_test_and_set instruction pattern. A boolean value >>>> is returned from the operation, using TARGET if possible. */ >>>> >>>> -#ifndef HAVE_atomic_test_and_set >>>> -#define HAVE_atomic_test_and_set 0 >>>> -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing >>>> -#endif >>>> - >>>> static rtx >>>> maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) >>>> { >>>> machine_mode pat_bool_mode; >>>> struct expand_operand ops[3]; >>>> >>>> - if (!HAVE_atomic_test_and_set) >>>> + if (!targetm.have_atomic_test_and_set ()) >>>> return NULL_RTX; >>> >>> I know this was not there before but this if should be marked as >>> unlikely as most targets where someone is using __atomic_*/__sync_* >>> will have those patterns. >> >> I think that'd be premature optimisation. The path being guarded here >> generates new rtl instructions, which is a much more expensive operation >> than a mispredicated branch. > > That might be true that the rest is more expensive but the common path > would be through there. > It is not just about mispredicted branch but more about icache miss. Do you mean an icache miss from having to fetch a new branch target after a mispredicted branch? Or one from calling a targetm function that isn't already cached? The latter wouldn't be fixed by marking the condition as unlikely: we'd still need to call the function. But calls to maybe_emit_atomic_test_and_set are concentrated in expand. If the code is hot enough for __builtin_expect or devirtualisation to be useful, I'd have expected the code to be available in at least L2. maybe_emit_atomic_test_and_set is only used to expand calls to __atomic_test_and_set and __sync_lock_test_and_set (only as a last resort). Each call to one of those functions will require a call tree to be built, will require a gimple statement to be built, will require trees and gimple statements for the memory address to be built, will require expand_expr to be called on the argument and a MEM to be created for the result, will require the instruction pattern rtx to be generated (itself an indirect call), will require the rtx_insn to be created and inserted into the rtl stream, will require the instruction to be reloaded, and will require the instruction to be written out as text, even at -O0. An extra indirect function call or missing __builtin_expect opportunity per built-in call is going to be in the noise, even if the input code was dominated by calls to these built-in functions (which seems unlikely). Also, I think we usually predict null returns to be unlikely anyway, in the absence of actual profiling information. In my build the code already branches on !have and falls through on have. Thanks, Richard
Index: gcc/target-insns.def =================================================================== --- gcc/target-insns.def 2015-07-28 21:00:09.815019853 +0100 +++ gcc/target-insns.def 2015-07-28 21:00:09.811019905 +0100 @@ -31,6 +31,7 @@ Instructions should be documented in md.texi rather than here. */ DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1)) +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2)) DEF_TARGET_INSN (builtin_longjmp, (rtx x0)) DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0)) DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0)) Index: gcc/optabs.c =================================================================== --- gcc/optabs.c 2015-07-28 21:00:09.815019853 +0100 +++ gcc/optabs.c 2015-07-28 21:00:09.811019905 +0100 @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo using the atomic_test_and_set instruction pattern. A boolean value is returned from the operation, using TARGET if possible. */ -#ifndef HAVE_atomic_test_and_set -#define HAVE_atomic_test_and_set 0 -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing -#endif - static rtx maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) { machine_mode pat_bool_mode; struct expand_operand ops[3]; - if (!HAVE_atomic_test_and_set) + if (!targetm.have_atomic_test_and_set ()) return NULL_RTX; /* While we always get QImode from __atomic_test_and_set, we get other memory modes from __sync_lock_test_and_set. Note that we use no endian adjustment here. This matches the 4.6 behavior in the Sparc backend. */ - gcc_checking_assert - (insn_data[CODE_FOR_atomic_test_and_set].operand[1].mode == QImode); + enum insn_code icode = targetm.code_for_atomic_test_and_set; + gcc_checking_assert (insn_data[icode].operand[1].mode == QImode); if (GET_MODE (mem) != QImode) mem = adjust_address_nv (mem, QImode, 0); - pat_bool_mode = insn_data[CODE_FOR_atomic_test_and_set].operand[0].mode; + pat_bool_mode = insn_data[icode].operand[0].mode; create_output_operand (&ops[0], target, pat_bool_mode); create_fixed_operand (&ops[1], mem); create_integer_operand (&ops[2], model); - if (maybe_expand_insn (CODE_FOR_atomic_test_and_set, 3, ops)) + if (maybe_expand_insn (icode, 3, ops)) return ops[0].value; return NULL_RTX; }