Message ID | 5666E0E3.6070207@st.com |
---|---|
State | New |
Headers | show |
On 08/12/15 13:53, Christian Bruel wrote: > >> >> The __builtin_neon* aren't published anywhere and people really >> shouldn't be using that directly in source code and only use the >> interface in arm_neon.h which implements pretty much all the Neon >> intrinsics in the ACLE document. >> > > yes, I see. I wanted to reduce the problem as well, not to confuse anything by exposing those. sorry about this. > > Here is the amended patch that use the arm_neon.h interface instead of the builtins. Still fixes the same issues > > Thanks > > Christian > > lto-neon.patch > > 2015-12-07 Christian Bruel <christian.bruel@st.com> > > * config/arm/arm-builtins.c (ARM_BUILTIN_CRYPTO_BASE): New enum tag. > (arm_init_neon_builtins_internal): Rename arm_init_neon_builtins, > (arm_init_crypto_builtins_internal): Rename arm_init_crypto_builtins. > use add_builtin_function_ext_scope instead of add_builtin_function. > (neon_set_p, neon_crypto_set_p): Remove. > (arm_init_builtins): Always call arm_init_neon_builtins and > arm_init_crypto_builtins. > (arm_expand_builtin): Check ARM_BUILTIN_NEON_BASE and > ARM_BUILTIN_CRYPTO_BASE. > * config/arm/arm-protos.h (arm_init_neon_builtins): Remove proto. > * config/arm/arm.c (arm_can_inline_p): Return OK for builtins. > (arm_valid_target_attribute_tree) : Remove arm_init_neon_builtins call. > > 2015-12-07 Christian Bruel <christian.bruel@st.com> > > PR target/pr68784 > PR target/pr65837 > * gcc.target/arm/pr68784.c: New test. > * gcc.target/arm/lto/pr65837_0_attr.c: New test. > * gcc.target/arm/lto/pr65837_0.c: Force float-abi. > > Index: gcc/config/arm/arm-builtins.c > =================================================================== > --- gcc/config/arm/arm-builtins.c (revision 231363) > +++ gcc/config/arm/arm-builtins.c (working copy) > @@ -526,6 +526,8 @@ enum arm_builtins > #define CRYPTO3(L, U, M1, M2, M3, M4) \ > ARM_BUILTIN_CRYPTO_##U, > > + ARM_BUILTIN_CRYPTO_BASE, > + > #include "crypto.def" > > #undef CRYPTO1 > @@ -894,7 +896,7 @@ arm_init_simd_builtin_scalar_types (void > } > > static void > -arm_init_neon_builtins_internal (void) > +arm_init_neon_builtins (void) > { > unsigned int i, fcode = ARM_BUILTIN_NEON_PATTERN_START; > > @@ -1018,7 +1020,7 @@ arm_init_neon_builtins_internal (void) > } > > static void > -arm_init_crypto_builtins_internal (void) > +arm_init_crypto_builtins (void) > { > tree V16UQI_type_node > = arm_simd_builtin_type (V16QImode, true, false); > @@ -1098,25 +1100,6 @@ arm_init_crypto_builtins_internal (void) > #undef FT3 > } > > -static bool neon_set_p = false; > -static bool neon_crypto_set_p = false; > - > -void > -arm_init_neon_builtins (void) > -{ > - if (! neon_set_p) > - { > - neon_set_p = true; > - arm_init_neon_builtins_internal (); > - } > - > - if (! neon_crypto_set_p && TARGET_CRYPTO && TARGET_HARD_FLOAT) > - { > - neon_crypto_set_p = true; > - arm_init_crypto_builtins_internal (); > - } > -} > - > #undef NUM_DREG_TYPES > #undef NUM_QREG_TYPES > > @@ -1777,8 +1760,9 @@ arm_init_builtins (void) > arm_init_neon_builtins which uses it. */ > arm_init_fp16_builtins (); > > - if (TARGET_NEON) > - arm_init_neon_builtins (); > + arm_init_neon_builtins (); > + > + arm_init_crypto_builtins (); > > if (TARGET_CRC32) > arm_init_crc32_builtins (); > @@ -2332,9 +2316,26 @@ arm_expand_builtin (tree exp, > int mask; > int imm; > > + /* Check in the context of the function making the call whether the > + builtin is supported. */ > + if (fcode >= ARM_BUILTIN_NEON_BASE && !TARGET_NEON) > + { > + error ("%qE neon builtin is not supported in this configuration.", > + fndecl); > + return const0_rtx; > + } Can we make this error message more user friendly. "You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use these intrinsics" > + > if (fcode >= ARM_BUILTIN_NEON_BASE) > return arm_expand_neon_builtin (fcode, exp, target); > > + if (fcode >= ARM_BUILTIN_CRYPTO_BASE > + && (!TARGET_CRYPTO || !TARGET_HARD_FLOAT)) > + { > + error ("%qE crypto builtin is not supported in this configuration.", > + fndecl); > + return const0_rtx; > + } "You must enable crypto intrinsics (e.g. -mfloat-abi=softfp -mfpu=crypto-neon...) to use these intrinsics" I'm still playing with this patch. regards Ramana > + > switch (fcode) > { > case ARM_BUILTIN_GET_FPSCR: > Index: gcc/config/arm/arm-protos.h > =================================================================== > --- gcc/config/arm/arm-protos.h (revision 231363) > +++ gcc/config/arm/arm-protos.h (working copy) > @@ -213,7 +213,6 @@ extern void arm_mark_dllimport (tree); > extern bool arm_change_mode_p (tree); > #endif > > -extern void arm_init_neon_builtins (void); > extern tree arm_valid_target_attribute_tree (tree, struct gcc_options *, > struct gcc_options *); > extern void arm_pr_long_calls (struct cpp_reader *); > Index: gcc/config/arm/arm.c > =================================================================== > --- gcc/config/arm/arm.c (revision 231363) > +++ gcc/config/arm/arm.c (working copy) > @@ -26542,16 +26542,10 @@ thumb_set_return_address (rtx source, rt > bool > arm_vector_mode_supported_p (machine_mode mode) > { > - /* Neon also supports V2SImode, etc. listed in the clause below. */ > - if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode > + if (mode == V2SFmode || mode == V4SImode || mode == V8HImode > || mode == V4HFmode || mode == V16QImode || mode == V4SFmode > - || mode == V2DImode || mode == V8HFmode)) > - return true; > - > - if ((TARGET_NEON || TARGET_IWMMXT) > - && ((mode == V2SImode) > - || (mode == V4HImode) > - || (mode == V8QImode))) > + || mode == V2DImode || mode == V8HFmode > + || mode == V2SImode || mode == V4HImode || mode == V8QImode) > return true; > > if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode > @@ -29926,9 +29920,6 @@ arm_valid_target_attribute_tree (tree ar > /* Do any overrides, such as global options arch=xxx. */ > arm_option_override_internal (opts, opts_set); > > - if (TARGET_NEON) > - arm_init_neon_builtins (); > - > return build_target_option_node (opts); > } > > Index: gcc/testsuite/gcc.target/arm/attr-crypto.c > =================================================================== > --- gcc/testsuite/gcc.target/arm/attr-crypto.c (revision 231363) > +++ gcc/testsuite/gcc.target/arm/attr-crypto.c (working copy) > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-require-effective-target arm_crypto_ok } */ > -/* { dg-options "-O2 -mfloat-abi=softfp" } */ > +/* { dg-additional-options "-mfpu=vfp -mfloat-abi=softfp" } */ > > #pragma GCC target ("fpu=crypto-neon-fp-armv8") > > Index: gcc/testsuite/gcc.target/arm/attr_thumb-static.c > =================================================================== > --- gcc/testsuite/gcc.target/arm/attr_thumb-static.c (revision 231363) > +++ gcc/testsuite/gcc.target/arm/attr_thumb-static.c (working copy) > @@ -1,5 +1,6 @@ > /* Check that a change mode to a static function is correctly handled. */ > /* { dg-do run } */ > +/* { dg-skip-if "Need thumb support" { ! { arm_thumb1_ok || arm_thumb2_ok } } { "*" } { "" } } */ > > static void > __attribute__((__noinline__)) > Index: gcc/testsuite/gcc.target/arm/lto/pr65837_0.c > =================================================================== > --- gcc/testsuite/gcc.target/arm/lto/pr65837_0.c (revision 231363) > +++ gcc/testsuite/gcc.target/arm/lto/pr65837_0.c (working copy) > @@ -1,5 +1,7 @@ > /* { dg-lto-do run } */ > -/* { dg-lto-options {{-flto -mfpu=neon}} } */ > +/* { dg-require-effective-target arm_neon_ok } */ > +/* { dg-skip-if "need hardfp ABI" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ > +/* { dg-lto-options {{-flto -mfpu=neon -mfloat-abi=hard}} } */ > /* { dg-suppress-ld-options {-mfpu=neon} } */ > > #include "arm_neon.h" > @@ -8,7 +10,7 @@ float32x2_t a, b, c, e; > > int main() > { > - e = __builtin_neon_vmls_lanev2sf (a, b, c, 0); > + e = vmls_lane_f32 (a, b, c, 0); > return 0; > } > > Index: gcc/testsuite/gcc.target/arm/lto/pr65837_0_attr.c > =================================================================== > --- gcc/testsuite/gcc.target/arm/lto/pr65837_0_attr.c (revision 0) > +++ gcc/testsuite/gcc.target/arm/lto/pr65837_0_attr.c (working copy) > @@ -0,0 +1,16 @@ > +/* { dg-lto-do run } */ > +/* { dg-require-effective-target arm_neon_ok } */ > +/* { dg-skip-if "need hardfp ABI" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ > +/* { dg-lto-options {{-flto -mfloat-abi=hard}} } */ > + > +#include "arm_neon.h" > + > +float32x2_t a, b, c, e; > + > +int __attribute__ ((target("fpu=neon"))) > +main() > +{ > + e = vmls_lane_f32 (a, b, c, 0); > + return 0; > +} > + > Index: gcc/testsuite/gcc.target/arm/pr68784.c > =================================================================== > --- gcc/testsuite/gcc.target/arm/pr68784.c (revision 0) > +++ gcc/testsuite/gcc.target/arm/pr68784.c (working copy) > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_neon_ok } */ > +/* { dg-options "-O2 -mfloat-abi=softfp" } */ > + > +#include "arm_neon.h" > + > +int8x8_t a, b; > +int16x8_t e; > + > +void > +__attribute__ ((target("fpu=neon"))) > +foo(void) > +{ > + e = (int16x8_t) vaddl_s8(a, b); > +} > +
>> + /* Check in the context of the function making the call whether the >> + builtin is supported. */ >> + if (fcode >= ARM_BUILTIN_NEON_BASE && !TARGET_NEON) >> + { >> + error ("%qE neon builtin is not supported in this configuration.", >> + fndecl); >> + return const0_rtx; >> + } > > Can we make this error message more user friendly. > > "You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use these intrinsics" yes, maybe also mention here arm_neon.h since this error is a sanity catch in case of direct __builtin calls. what about something like: "You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) and use the functions from arm_neon.h" ? thanks to the #pragma, calls from arm_neon.h should never result into this error (eventually a target specific option mismatch caught from arm_can_inline_p in case of mismatch with the caller) > >> + >> if (fcode >= ARM_BUILTIN_NEON_BASE) >> return arm_expand_neon_builtin (fcode, exp, target); >> >> + if (fcode >= ARM_BUILTIN_CRYPTO_BASE >> + && (!TARGET_CRYPTO || !TARGET_HARD_FLOAT)) >> + { >> + error ("%qE crypto builtin is not supported in this configuration.", >> + fndecl); >> + return const0_rtx; >> + } > > "You must enable crypto intrinsics (e.g. -mfloat-abi=softfp -mfpu=crypto-neon...) to use these intrinsics" yes, same as above > > I'm still playing with this patch. thanks, Regards Christian > > regards > Ramana > > > > >
2015-12-07 Christian Bruel <christian.bruel@st.com> * config/arm/arm-builtins.c (ARM_BUILTIN_CRYPTO_BASE): New enum tag. (arm_init_neon_builtins_internal): Rename arm_init_neon_builtins, (arm_init_crypto_builtins_internal): Rename arm_init_crypto_builtins. use add_builtin_function_ext_scope instead of add_builtin_function. (neon_set_p, neon_crypto_set_p): Remove. (arm_init_builtins): Always call arm_init_neon_builtins and arm_init_crypto_builtins. (arm_expand_builtin): Check ARM_BUILTIN_NEON_BASE and ARM_BUILTIN_CRYPTO_BASE. * config/arm/arm-protos.h (arm_init_neon_builtins): Remove proto. * config/arm/arm.c (arm_can_inline_p): Return OK for builtins. (arm_valid_target_attribute_tree) : Remove arm_init_neon_builtins call. 2015-12-07 Christian Bruel <christian.bruel@st.com> PR target/pr68784 PR target/pr65837 * gcc.target/arm/pr68784.c: New test. * gcc.target/arm/lto/pr65837_0_attr.c: New test. * gcc.target/arm/lto/pr65837_0.c: Force float-abi. Index: gcc/config/arm/arm-builtins.c =================================================================== --- gcc/config/arm/arm-builtins.c (revision 231363) +++ gcc/config/arm/arm-builtins.c (working copy) @@ -526,6 +526,8 @@ enum arm_builtins #define CRYPTO3(L, U, M1, M2, M3, M4) \ ARM_BUILTIN_CRYPTO_##U, + ARM_BUILTIN_CRYPTO_BASE, + #include "crypto.def" #undef CRYPTO1 @@ -894,7 +896,7 @@ arm_init_simd_builtin_scalar_types (void } static void -arm_init_neon_builtins_internal (void) +arm_init_neon_builtins (void) { unsigned int i, fcode = ARM_BUILTIN_NEON_PATTERN_START; @@ -1018,7 +1020,7 @@ arm_init_neon_builtins_internal (void) } static void -arm_init_crypto_builtins_internal (void) +arm_init_crypto_builtins (void) { tree V16UQI_type_node = arm_simd_builtin_type (V16QImode, true, false); @@ -1098,25 +1100,6 @@ arm_init_crypto_builtins_internal (void) #undef FT3 } -static bool neon_set_p = false; -static bool neon_crypto_set_p = false; - -void -arm_init_neon_builtins (void) -{ - if (! neon_set_p) - { - neon_set_p = true; - arm_init_neon_builtins_internal (); - } - - if (! neon_crypto_set_p && TARGET_CRYPTO && TARGET_HARD_FLOAT) - { - neon_crypto_set_p = true; - arm_init_crypto_builtins_internal (); - } -} - #undef NUM_DREG_TYPES #undef NUM_QREG_TYPES @@ -1777,8 +1760,9 @@ arm_init_builtins (void) arm_init_neon_builtins which uses it. */ arm_init_fp16_builtins (); - if (TARGET_NEON) - arm_init_neon_builtins (); + arm_init_neon_builtins (); + + arm_init_crypto_builtins (); if (TARGET_CRC32) arm_init_crc32_builtins (); @@ -2332,9 +2316,26 @@ arm_expand_builtin (tree exp, int mask; int imm; + /* Check in the context of the function making the call whether the + builtin is supported. */ + if (fcode >= ARM_BUILTIN_NEON_BASE && !TARGET_NEON) + { + error ("%qE neon builtin is not supported in this configuration.", + fndecl); + return const0_rtx; + } + if (fcode >= ARM_BUILTIN_NEON_BASE) return arm_expand_neon_builtin (fcode, exp, target); + if (fcode >= ARM_BUILTIN_CRYPTO_BASE + && (!TARGET_CRYPTO || !TARGET_HARD_FLOAT)) + { + error ("%qE crypto builtin is not supported in this configuration.", + fndecl); + return const0_rtx; + } + switch (fcode) { case ARM_BUILTIN_GET_FPSCR: Index: gcc/config/arm/arm-protos.h =================================================================== --- gcc/config/arm/arm-protos.h (revision 231363) +++ gcc/config/arm/arm-protos.h (working copy) @@ -213,7 +213,6 @@ extern void arm_mark_dllimport (tree); extern bool arm_change_mode_p (tree); #endif -extern void arm_init_neon_builtins (void); extern tree arm_valid_target_attribute_tree (tree, struct gcc_options *, struct gcc_options *); extern void arm_pr_long_calls (struct cpp_reader *); Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 231363) +++ gcc/config/arm/arm.c (working copy) @@ -26542,16 +26542,10 @@ thumb_set_return_address (rtx source, rt bool arm_vector_mode_supported_p (machine_mode mode) { - /* Neon also supports V2SImode, etc. listed in the clause below. */ - if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode + if (mode == V2SFmode || mode == V4SImode || mode == V8HImode || mode == V4HFmode || mode == V16QImode || mode == V4SFmode - || mode == V2DImode || mode == V8HFmode)) - return true; - - if ((TARGET_NEON || TARGET_IWMMXT) - && ((mode == V2SImode) - || (mode == V4HImode) - || (mode == V8QImode))) + || mode == V2DImode || mode == V8HFmode + || mode == V2SImode || mode == V4HImode || mode == V8QImode) return true; if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode @@ -29926,9 +29920,6 @@ arm_valid_target_attribute_tree (tree ar /* Do any overrides, such as global options arch=xxx. */ arm_option_override_internal (opts, opts_set); - if (TARGET_NEON) - arm_init_neon_builtins (); - return build_target_option_node (opts); } Index: gcc/testsuite/gcc.target/arm/attr-crypto.c =================================================================== --- gcc/testsuite/gcc.target/arm/attr-crypto.c (revision 231363) +++ gcc/testsuite/gcc.target/arm/attr-crypto.c (working copy) @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_crypto_ok } */ -/* { dg-options "-O2 -mfloat-abi=softfp" } */ +/* { dg-additional-options "-mfpu=vfp -mfloat-abi=softfp" } */ #pragma GCC target ("fpu=crypto-neon-fp-armv8") Index: gcc/testsuite/gcc.target/arm/attr_thumb-static.c =================================================================== --- gcc/testsuite/gcc.target/arm/attr_thumb-static.c (revision 231363) +++ gcc/testsuite/gcc.target/arm/attr_thumb-static.c (working copy) @@ -1,5 +1,6 @@ /* Check that a change mode to a static function is correctly handled. */ /* { dg-do run } */ +/* { dg-skip-if "Need thumb support" { ! { arm_thumb1_ok || arm_thumb2_ok } } { "*" } { "" } } */ static void __attribute__((__noinline__)) Index: gcc/testsuite/gcc.target/arm/lto/pr65837_0.c =================================================================== --- gcc/testsuite/gcc.target/arm/lto/pr65837_0.c (revision 231363) +++ gcc/testsuite/gcc.target/arm/lto/pr65837_0.c (working copy) @@ -1,5 +1,7 @@ /* { dg-lto-do run } */ -/* { dg-lto-options {{-flto -mfpu=neon}} } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-skip-if "need hardfp ABI" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ +/* { dg-lto-options {{-flto -mfpu=neon -mfloat-abi=hard}} } */ /* { dg-suppress-ld-options {-mfpu=neon} } */ #include "arm_neon.h" @@ -8,7 +10,7 @@ float32x2_t a, b, c, e; int main() { - e = __builtin_neon_vmls_lanev2sf (a, b, c, 0); + e = vmls_lane_f32 (a, b, c, 0); return 0; } Index: gcc/testsuite/gcc.target/arm/lto/pr65837_0_attr.c =================================================================== --- gcc/testsuite/gcc.target/arm/lto/pr65837_0_attr.c (revision 0) +++ gcc/testsuite/gcc.target/arm/lto/pr65837_0_attr.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-lto-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-skip-if "need hardfp ABI" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ +/* { dg-lto-options {{-flto -mfloat-abi=hard}} } */ + +#include "arm_neon.h" + +float32x2_t a, b, c, e; + +int __attribute__ ((target("fpu=neon"))) +main() +{ + e = vmls_lane_f32 (a, b, c, 0); + return 0; +} + Index: gcc/testsuite/gcc.target/arm/pr68784.c =================================================================== --- gcc/testsuite/gcc.target/arm/pr68784.c (revision 0) +++ gcc/testsuite/gcc.target/arm/pr68784.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp" } */ + +#include "arm_neon.h" + +int8x8_t a, b; +int16x8_t e; + +void +__attribute__ ((target("fpu=neon"))) +foo(void) +{ + e = (int16x8_t) vaddl_s8(a, b); +} +