commit 07191e8bbcd3ecbd14d19f0a4296249ba6c2770f
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed May 20 12:02:33 2015 +0100
[AArch64][11/N] Re-layout SIMD builtin types on builtin expansion
@@ -555,7 +555,7 @@ aarch64_simd_builtin_type (enum machine_mode mode,
else
return aarch64_lookup_simd_builtin_type (mode, qualifier_none);
}
-
+
static void
aarch64_init_simd_builtin_types (void)
{
@@ -679,11 +679,18 @@ aarch64_init_simd_builtin_scalar_types (void)
"__builtin_aarch64_simd_udi");
}
-static void
+static bool simd_builtins_inited_p = false;
+
+void
aarch64_init_simd_builtins (void)
{
unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
+ if (simd_builtins_inited_p)
+ return;
+
+ simd_builtins_inited_p = true;
+
aarch64_init_simd_builtin_types ();
/* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
@@ -846,8 +853,8 @@ aarch64_init_builtins (void)
if (TARGET_SIMD)
aarch64_init_simd_builtins ();
- if (TARGET_CRC32)
- aarch64_init_crc32_builtins ();
+
+ aarch64_init_crc32_builtins ();
}
tree
@@ -867,6 +874,31 @@ typedef enum
SIMD_ARG_STOP
} builtin_simd_arg;
+/* Relayout the decl of a function arg. Keep the RTL component the same,
+ as varasm.c ICEs at varasm.c:1324. It doesn't like reinitializing the RTL
+ on PARM decls. Something like this needs to be done when compiling a
+ file without SIMD and then tagging a function with +simd and using SIMD
+ intrinsics in there. The types will have been laid out assuming no SIMD,
+ so we want to re-lay them out. */
+
+static void
+aarch64_relayout_simd_param (tree arg)
+{
+ tree argdecl = arg;
+ if (TREE_CODE (argdecl) == SSA_NAME)
+ argdecl = SSA_NAME_VAR (argdecl);
+
+ if (argdecl
+ && (TREE_CODE (argdecl) == PARM_DECL
+ || TREE_CODE (argdecl) == VAR_DECL))
+ {
+ rtx rtl = NULL_RTX;
+ rtl = DECL_RTL_IF_SET (argdecl);
+ relayout_decl (argdecl);
+ SET_DECL_RTL (argdecl, rtl);
+ }
+}
+
static rtx
aarch64_simd_expand_args (rtx target, int icode, int have_retval,
tree exp, builtin_simd_arg *args)
@@ -895,6 +927,7 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
{
tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
enum machine_mode mode = insn_data[icode].operand[opc].mode;
+ aarch64_relayout_simd_param (arg);
op[opc] = expand_normal (arg);
switch (thisarg)
@@ -179,6 +179,19 @@ aarch64_pragma_target_parse (tree args, tree pop_target)
cpp_opts->warn_unused_macros = saved_warn_unused_macros;
+ /* Initialize SIMD builtins if we haven't already.
+ Set current_target_pragma to NULL for the duration so that
+ the builtin initialization code doesn't try to tag the functions
+ being built with the attributes specified by any current pragma, thus
+ going into an infinite recursion. */
+ if (TARGET_SIMD)
+ {
+ tree saved_current_target_pragma = current_target_pragma;
+ current_target_pragma = NULL;
+ aarch64_init_simd_builtins ();
+ current_target_pragma = saved_current_target_pragma;
+ }
+
return ret;
}
@@ -382,6 +382,8 @@ extern bool aarch64_madd_needs_nop (rtx_insn *);
extern void aarch64_final_prescan_insn (rtx_insn *);
extern void aarch64_reset_previous_fndecl (void);
extern void aarch64_cpu_cpp_builtins (cpp_reader *);
+extern void aarch64_init_simd_builtins (void);
+extern void aarch64_relayout_simd_types (void);
extern void aarch64_register_pragmas (void);
extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
@@ -8466,6 +8466,18 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
if (ret)
{
aarch64_override_options_internal (&global_options);
+ /* Initialize SIMD builtins if we haven't already.
+ Set current_target_pragma to NULL for the duration so that
+ the builtin initialization code doesn't try to tag the functions
+ being built with the attributes specified by any current pragma, thus
+ going into an infinite recursion. */
+ if (TARGET_SIMD)
+ {
+ tree saved_current_target_pragma = current_target_pragma;
+ current_target_pragma = NULL;
+ aarch64_init_simd_builtins ();
+ current_target_pragma = saved_current_target_pragma;
+ }
new_target = build_target_option_node (&global_options);
}
else
@@ -8485,7 +8497,6 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
}
cl_target_option_restore (&global_options, &cur_target);
-
if (old_optimize != new_optimize)
cl_optimization_restore (&global_options,
TREE_OPTIMIZATION (old_optimize));
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=thunderx+nofp" } */
+
+#include "arm_neon.h"
+
+/* Unless we do something about re-laying out the SIMD builtin types
+ this testcase ICEs during expansion of the crypto builtin. */
+
+__attribute__((target("cpu=cortex-a57+crypto")))
+uint32x4_t
+test_vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+{
+ return vsha1cq_u32 (hash_abcd, hash_e, wk);
+}
+
+/* This one should be compiled for thunderx with no fp. */
+int
+foo (int a)
+{
+ return a + 5;
+}