===================================================================
@@ -630,7 +630,8 @@
void tree_function_versioning (tree, tree, VEC (ipa_replace_map_p,gc)*,
bool, bitmap, bool, bitmap, basic_block);
void tree_elem_fn_versioning (tree, tree, VEC (ipa_replace_map_p,gc)*,
- bool, bitmap, bool, bitmap, basic_block, int);
+ bool, bitmap, bool, bitmap, basic_block, int,
+ bool);
bool cgraph_process_new_functions (void);
void cgraph_process_same_body_aliases (void);
void fixup_same_cpp_alias_visibility (symtab_node node, symtab_node target, tree alias);
===================================================================
@@ -6234,6 +6234,9 @@
tree build_call_list (tree return_type, tree fn, tree arglist);
tree build_function_linkage_variant (tree ttype,
enum function_linkage linkage);
+bool is_elem_fn (tree);
+enum elem_fn_parm_type find_elem_fn_parm_type (gimple, tree, tree*);
+void elem_fn_create_fn (tree) __attribute__((weak));
/* Functional interface to the builtin functions. */
===================================================================
@@ -226,15 +226,23 @@
static bool
cgraph_decide_is_function_needed (struct cgraph_node *node, tree decl)
{
+ bool is_cloned_elem_func = false;
/* If the user told us it is used, then it must be so. */
if (node->symbol.force_output)
return true;
+ /* When an elemental function is cloned, we set the elem_fn_already_cloned,
+ will be set to true, for all other functions, it is initalized to zero.
+ So, if it is an elemental function, we output it without questioning */
+ if (DECL_STRUCT_FUNCTION (decl))
+ is_cloned_elem_func = DECL_STRUCT_FUNCTION (decl)->elem_fn_already_cloned;
+
/* Double check that no one output the function into assembly file
early. */
gcc_checking_assert (!DECL_ASSEMBLER_NAME_SET_P (decl)
|| (node->thunk.thunk_p || node->same_body_alias)
- || !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)));
+ || !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))
+ || is_cloned_elem_func);
/* Keep constructors, destructors and virtual functions. */
===================================================================
@@ -195,6 +195,8 @@
#define notify_zc_intrinsic_fndecl cilk_trees[NOTIFY_ZC_INTRINSIC]
#define notify_intrinsic_fndecl cilk_trees[NOTIFY_INTRINSIC]
+/* this is the max number of data we have have in elem-function arrays */
+#define MAX_VARS 50
typedef struct zca_data_t
{
@@ -205,7 +207,41 @@
struct zca_data_t *ptr_next;
} zca_data;
+/* These are different mask options. I put 12345 so that we can defferenciate
+ * the value during debugging */
+enum mask_options {
+ USE_MASK = 12345,
+ USE_NOMASK,
+ USE_BOTH
+};
+/* this data structure will hold all the data from the vector attribute */
+typedef struct
+{
+ char *proc_type;
+ enum mask_options mask;
+ int vectorlength[MAX_VARS];
+ int no_vlengths;
+ char *uniform_vars[MAX_VARS];
+ int no_uvars;
+ int uniform_location[MAX_VARS]; /* their location in parm list */
+ char *linear_vars[MAX_VARS];
+ int linear_steps[MAX_VARS];
+ int linear_location[MAX_VARS]; /* their location in parm list */
+ int no_lvars;
+ int private_location[MAX_VARS]; /* parm not in uniform or linear list */
+ int no_pvars;
+ char *func_prefix;
+ int total_no_args;
+} elem_fn_info;
+
+/* this data structure will hold all the arguments in the function */
+typedef struct {
+ tree induction_var;
+ tree arguments;
+ tree return_var;
+} fn_vect_elements;
+
/* Offset of fields in the Cilk frame descriptor.
Index is same as for cilk_trees. If the index
does not correspond to a field of the Cilk frame
@@ -270,6 +306,14 @@
extern void debug_zca_data (void);
extern zca_data *get_zca_entry (int);
extern void insert_in_zca_table (zca_data);
-extern bool is_elem_fn (tree);
+extern bool is_elem_fn (tree);
extern tree find_elem_fn_name (tree, tree, tree);
+extern void elem_fn_create_fn (tree);
+extern char *find_processor_code (elem_fn_info *);
+extern char *find_vlength_code (elem_fn_info *);
+extern tree rename_elem_fn (tree, const char *);
+extern char *find_suffix (elem_fn_info *, bool);
+extern enum elem_fn_parm_type find_elem_fn_parm_type (gimple, tree, tree *);
+extern tree find_elem_fn_name (tree, tree, tree);
+elem_fn_info *extract_elem_fn_values (tree);
#endif /* GCC_CILK_H */
===================================================================
@@ -1,9 +1,10 @@
/* This file is part of the Intel(R) Cilk(TM) Plus support
- This file contains the functions for Elemental functions.
+ This file contains C/C++ specific functions for elemental
+ functions.
Copyright (C) 2012 Free Software Foundation, Inc.
Written by Balaji V. Iyer <balaji.v.iyer@intel.com>,
- Intel Corporation
+ Intel Corporation
Many Thanks to Karthik Kumar for advice on the basic technique
about cloning functions.
@@ -29,7 +30,8 @@
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
-#include "rtl.h"
+#include "langhooks.h"
+#include "cilk.h"
#include "tm_p.h"
#include "hard-reg-set.h"
#include "basic-block.h"
@@ -40,8 +42,8 @@
#include "tree-dump.h"
#include "tree-pass.h"
#include "timevar.h"
-#include "cfgloop.h"
#include "flags.h"
+#include "c-tree.h"
#include "tree-inline.h"
#include "cgraph.h"
#include "ipa-prop.h"
@@ -52,196 +54,12 @@
#include "intl.h"
#include "vec.h"
-#define MAX_VARS 50
-enum mask_options {
- USE_MASK = 12345,
- USE_NOMASK,
- USE_BOTH
-};
-
-typedef struct
-{
- char *proc_type;
- enum mask_options mask;
- int vectorlength[MAX_VARS];
- int no_vlengths;
- char *uniform_vars[MAX_VARS];
- int no_uvars;
- int uniform_location[MAX_VARS]; /* their location in parm list */
- char *linear_vars[MAX_VARS];
- int linear_steps[MAX_VARS];
- int linear_location[MAX_VARS]; /* their location in parm list */
- int no_lvars;
- int private_location[MAX_VARS]; /* parm not in uniform or linear list */
- int no_pvars;
- char *func_prefix;
- int total_no_args;
-} elem_fn_info;
-
-static elem_fn_info *extract_elem_fn_values (tree);
static tree create_optimize_attribute (int);
static tree create_processor_attribute (elem_fn_info *, tree *);
+static tree elem_fn_build_array (tree base_var, tree index);
-/* this is an helper function for find_elem_fn_param_type */
-static enum elem_fn_parm_type
-find_elem_fn_parm_type_1 (tree fndecl, int parm_no, tree *step_size)
-{
- int ii = 0;
- elem_fn_info *elem_fn_values;
- elem_fn_values = extract_elem_fn_values (fndecl);
- if (!elem_fn_values)
- return TYPE_NONE;
-
- for (ii = 0; ii < elem_fn_values->no_lvars; ii++)
- if (elem_fn_values->linear_location[ii] == parm_no)
- {
- if (step_size != NULL)
- *step_size = build_int_cst (integer_type_node,
- elem_fn_values->linear_steps[ii]);
- return TYPE_LINEAR;
- }
-
- for (ii = 0; ii < elem_fn_values->no_uvars; ii++)
- if (elem_fn_values->uniform_location[ii] == parm_no)
- return TYPE_UNIFORM;
-
- return TYPE_NONE;
-}
-
-
-/* this function will return the type of a parameter in elemental function.
- The choices are UNIFORM or LINEAR. */
-enum elem_fn_parm_type
-find_elem_fn_parm_type (gimple stmt, tree op, tree *step_size)
-{
- tree fndecl, parm = NULL_TREE;
- int ii, nargs;
- enum elem_fn_parm_type return_type = TYPE_NONE;
-
- if (gimple_code (stmt) != GIMPLE_CALL)
- return TYPE_NONE;
-
- fndecl = gimple_call_fndecl (stmt);
- gcc_assert (fndecl);
-
- nargs = gimple_call_num_args (stmt);
-
- for (ii = 0; ii < nargs; ii++)
- {
- parm = gimple_call_arg (stmt, ii);
- if (op == parm)
- {
- return_type = find_elem_fn_parm_type_1 (fndecl, ii, step_size);
- return return_type;
- }
- }
- return return_type;
-}
-
-/* this function will concatinate the suffix to the existing function decl */
-static tree
-rename_elem_fn (tree decl, const char *suffix)
-{
- int length = 0;
- const char *fn_name = IDENTIFIER_POINTER (DECL_NAME (decl));
- char *new_fn_name;
- tree new_decl = NULL_TREE;
-
- if (!suffix || !fn_name)
- return decl;
- else
- new_decl = decl;
-
- length = strlen (fn_name) + strlen (suffix) + 1;
- new_fn_name = (char *)xmalloc (length);
- strcpy (new_fn_name, fn_name);
- strcat (new_fn_name, suffix);
-
- DECL_NAME (new_decl) = get_identifier (new_fn_name);
- return new_decl;
-}
-
-/* this function will check to see if the node is part of an function that
- * needs to be converted to its vector equivalent. */
-bool
-is_elem_fn (tree fndecl)
-{
- tree ii_tree;
-
- for (ii_tree = DECL_ATTRIBUTES (fndecl); ii_tree;
- ii_tree = TREE_CHAIN (ii_tree))
- {
- tree ii_value = TREE_PURPOSE (ii_tree);
- if (TREE_CODE (ii_value) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (ii_value), "vector"))
- return true;
- }
-
- /* If we are here, then we didn't find a vector keyword, so it is false */
- return false;
-}
-
-/* This function will find the appropriate processor code in the function
- * mangling vector function
- */
-static char *
-find_processor_code (elem_fn_info *elem_fn_values)
-{
- if (!elem_fn_values || !elem_fn_values->proc_type)
- return xstrdup ("B");
-
- if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
- return xstrdup ("B");
- else if (!strcmp (elem_fn_values->proc_type, "pentium4_sse3"))
- return xstrdup ("D");
- else if (!strcmp (elem_fn_values->proc_type, "core2_duo_ssse3"))
- return xstrdup ("E");
- else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse_4_1"))
- return xstrdup ("F");
- else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
- return xstrdup ("H");
- else
- gcc_unreachable ();
-
- return NULL; /* should never get here */
-}
-
-/* this function will return vectorlength, if specified, in string format -OR-
- * it will give the default vector length for the specified architecture. */
-static char *
-find_vlength_code (elem_fn_info *elem_fn_values)
-{
- char *vlength_code = (char *) xmalloc (sizeof (char) * 10);
- if (!elem_fn_values)
- {
- sprintf (vlength_code, "4");
- return vlength_code;
- }
-
- memset (vlength_code, 10, 0);
-
- if (elem_fn_values->no_vlengths != 0)
- sprintf(vlength_code,"%d", elem_fn_values->vectorlength[0]);
- else
- {
- if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
- sprintf (vlength_code,"4");
- else if (!strcmp (elem_fn_values->proc_type, "pentium4_sse3"))
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "core2_duo_ssse3"))
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse_4_1"))
- sprintf (vlength_code, "4");
- else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
- sprintf (vlength_code, "4");
- else
- gcc_unreachable ();
- }
- return vlength_code;
-}
-
/* This function will create the appropriate __target__ attribute for the
* processor */
static tree
@@ -356,7 +174,16 @@
VEC(tree,gc) *opt_vec = VEC_alloc (tree,gc, 4);
char optimization[2];
optimization[0] = 'O';
- sprintf(&optimization[1], "%1d", option);
+
+ if (option == 3)
+ optimization[1] = '3';
+ else if (option == 2)
+ optimization[1] = '2';
+ else if (option == 1)
+ optimization[1] = '1';
+ else if (option == 0)
+ optimization[1] = '0';
+
VEC_safe_push (tree, gc, opt_vec, build_string (2, optimization));
opt_attr = build_tree_list_vec (opt_vec);
VEC_truncate (tree, opt_vec, 0);
@@ -364,343 +191,363 @@
return opt_attr;
}
-/* this function will find the appropriate mangling suffix for the vector
- * function */
-static char *
-find_suffix (elem_fn_info *elem_fn_values, bool masked)
+
+/* this function will store return expression to a temporary var */
+static tree
+replace_return_with_new_var (tree *tp, int *walk_subtrees, void *data)
{
- char *suffix = (char*)xmalloc (100);
- char tmp_str[10];
- int arg_number, ii_pvar, ii_uvar, ii_lvar;
- strcpy (suffix, "._simdsimd_");
- strcat (suffix, find_processor_code (elem_fn_values));
- strcat (suffix, find_vlength_code (elem_fn_values));
- if (masked)
- strcpy (suffix, "m");
- else
- strcat (suffix, "n");
+ tree mod_expr = NULL_TREE, return_var = NULL_TREE, ret_expr = NULL_TREE;
+
+ if (!*tp)
+ return NULL_TREE;
- for (arg_number = 1; arg_number <= elem_fn_values->total_no_args;
- arg_number++)
+ if (TREE_CODE (*tp) == RETURN_EXPR)
{
- for (ii_lvar = 0; ii_lvar < elem_fn_values->no_lvars; ii_lvar++)
+ return_var = (tree) data;
+ ret_expr = TREE_OPERAND (TREE_OPERAND (*tp, 0), 1);
+ mod_expr = build2 (MODIFY_EXPR, TREE_TYPE (return_var), return_var,
+ ret_expr);
+ *tp = mod_expr;
+ *walk_subtrees = 0;
+ }
+ return NULL_TREE;
+}
+
+
+/* This function will create a vector access as a array access */
+static tree
+elem_fn_build_array (tree base_var, tree index)
+{
+ return build_array_ref (UNKNOWN_LOCATION, base_var, index);
+}
+
+/* this function wil replace all vector references with array references. */
+static tree
+replace_array_ref_for_vec (tree *tp, int *walk_subtrees, void *data)
+{
+ tree ii_var;
+ fn_vect_elements *func_data;
+ if (!*tp)
+ return NULL_TREE;
+
+ if (TREE_CODE (*tp) == VAR_DECL || TREE_CODE (*tp) == PARM_DECL)
+ {
+ func_data = (fn_vect_elements *) data;
+ gcc_assert (func_data->induction_var);
+ for (ii_var = func_data->arguments; ii_var; ii_var = DECL_CHAIN (ii_var))
{
- if (elem_fn_values->linear_location[ii_lvar] == arg_number)
+ if (DECL_NAME (ii_var) == DECL_NAME (*tp))
{
- strcat (suffix, "_l");
- sprintf(tmp_str, "%d", elem_fn_values->linear_steps[ii_lvar]);
- strcat (suffix, tmp_str);
+ *tp = elem_fn_build_array (*tp, func_data->induction_var);
+ *walk_subtrees = 0;
+ return NULL_TREE;
}
}
- for (ii_uvar = 0; ii_uvar < elem_fn_values->no_uvars; ii_uvar++)
+ if (func_data->return_var &&
+ (DECL_NAME (*tp) == DECL_NAME (func_data->return_var)))
{
- if (elem_fn_values->uniform_location[ii_uvar] == arg_number)
- strcat (suffix, "_s1");
+ *tp = elem_fn_build_array (*tp, func_data->induction_var);
+ *walk_subtrees = 0;
}
- for (ii_pvar = 0; ii_pvar < elem_fn_values->no_pvars; ii_pvar++)
- {
- if (elem_fn_values->private_location[ii_pvar] == arg_number)
- strcat (suffix, "_v1");
- }
- }
- return suffix;
+ }
+ return NULL_TREE;
}
-tree
-find_elem_fn_name (tree old_fndecl,
- tree vectype_out ATTRIBUTE_UNUSED,
- tree vectype_in ATTRIBUTE_UNUSED)
+/* this function will move return values to the end of the function */
+static void
+fix_elem_fn_return_value (tree fndecl, tree induction_var)
{
- elem_fn_info *elem_fn_values = NULL;
- tree new_fndecl = NULL_TREE, arg_type = NULL_TREE;
- char *suffix = NULL;
+ fn_vect_elements data;
+ tree old_fndecl;
+ tree new_var, new_var_init, new_body = NULL_TREE;
+ tree ret_expr, ret_stmt = NULL_TREE;
+ if (!fndecl || !DECL_SAVED_TREE (fndecl))
+ return;
+
+ if (TREE_TYPE (DECL_RESULT (fndecl)) == void_type_node)
+ return;
+
+ old_fndecl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (fndecl));
+ current_function_decl = fndecl;
- elem_fn_values = extract_elem_fn_values (old_fndecl);
-
- if (elem_fn_values)
+ new_var = create_tmp_var (TREE_TYPE (DECL_RESULT (fndecl)), "elem_fn_ret");
+ new_var_init =
+ build_vector_from_val
+ (TREE_TYPE (DECL_RESULT (fndecl)),
+ build_zero_cst (TREE_TYPE (TREE_TYPE (DECL_RESULT (fndecl)))));
+ DECL_INITIAL (new_var) = new_var_init;
+ walk_tree (&DECL_SAVED_TREE (fndecl), replace_return_with_new_var,
+ (void *)new_var, NULL);
+ data.return_var = new_var;
+ data.arguments = DECL_ARGUMENTS (fndecl);
+ data.induction_var = induction_var;
+
+ walk_tree (&DECL_SAVED_TREE (fndecl), replace_array_ref_for_vec,
+ (void *) &data, NULL);
+ ret_expr = build2 (MODIFY_EXPR, TREE_TYPE (new_var),
+ DECL_RESULT (fndecl), new_var);
+
+ ret_stmt = build1 (RETURN_EXPR, TREE_TYPE (ret_expr), ret_expr);
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
{
- if (elem_fn_values->no_vlengths > 0)
+
+ if (!BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)))
+ ;
+ else if (TREE_CODE (BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl))) !=
+ TREE_LIST)
{
- if (elem_fn_values->vectorlength[0] ==
- (int)TYPE_VECTOR_SUBPARTS (vectype_out))
- suffix = find_suffix (elem_fn_values, false);
- else
- return NULL_TREE;
+ append_to_statement_list_force
+ (BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)), &new_body);
+ append_to_statement_list_force (ret_stmt, &new_body);
}
else
- return NULL_TREE;
+ {
+ new_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl));
+ append_to_statement_list_force (ret_stmt, &new_body);
+ }
+ BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = new_body;
}
- else
+
+ pop_cfun ();
+ current_function_decl = old_fndecl;
+ return;
+}
+
+/* this function will break a vector value to scalar with a for loop in front */
+static tree
+add_elem_fn_loop (tree fndecl, int vlength)
+{
+ tree exit_label = NULL_TREE, if_label = NULL_TREE, body_label = NULL_TREE;
+ tree fn_body, loop = NULL_TREE, loop_var, mod_var, incr_expr, cond_expr;
+ tree cmp_expr, old_fndecl;
+
+ if (!fndecl)
+ return NULL_TREE;
+
+ if (!DECL_SAVED_TREE (fndecl))
return NULL_TREE;
- new_fndecl = copy_node (rename_elem_fn (old_fndecl, suffix));
- TREE_TYPE (new_fndecl) = copy_node (TREE_TYPE (old_fndecl));
+ old_fndecl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (fndecl));
+ current_function_decl = fndecl;
+
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ fn_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl));
+ else
+ fn_body = DECL_SAVED_TREE (fndecl);
- TYPE_ARG_TYPES (TREE_TYPE (new_fndecl)) =
- copy_list (TYPE_ARG_TYPES (TREE_TYPE (new_fndecl)));
+ loop = alloc_stmt_list ();
- for (arg_type = TYPE_ARG_TYPES (TREE_TYPE (new_fndecl));
- arg_type && arg_type != void_type_node;
- arg_type = TREE_CHAIN (arg_type))
- TREE_VALUE (arg_type) = vectype_out;
+ loop_var = create_tmp_var (integer_type_node, "ii_elem_fn_vec_val");
+ mod_var = build2 (MODIFY_EXPR, void_type_node, loop_var,
+ build_int_cst (integer_type_node, 0));
+ append_to_statement_list_force (mod_var, &loop);
- if (TREE_TYPE (TREE_TYPE (new_fndecl)) != void_type_node)
- {
- TREE_TYPE (TREE_TYPE (new_fndecl)) =
- copy_node (TREE_TYPE (TREE_TYPE (new_fndecl)));
- TREE_TYPE (TREE_TYPE (new_fndecl)) = vectype_out;
- DECL_MODE (new_fndecl) = TYPE_MODE (vectype_out);
- }
+ if_label = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
+ get_identifier ("if_lab"), void_type_node);
+ DECL_CONTEXT (if_label) = fndecl;
+ DECL_ARTIFICIAL (if_label) = 0;
+ DECL_IGNORED_P (if_label) = 1;
+
+ exit_label = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
+ get_identifier ("exit_label"), void_type_node);
+ DECL_CONTEXT (exit_label) = fndecl;
+ DECL_ARTIFICIAL (exit_label) = 0;
+ DECL_IGNORED_P (exit_label) = 1;
+
+ body_label = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
+ get_identifier ("body_label"), void_type_node);
+ DECL_CONTEXT (body_label) = fndecl;
+ DECL_ARTIFICIAL (body_label) = 0;
+ DECL_IGNORED_P (body_label) = 1;
+ append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node,
+ if_label), &loop);
+ cmp_expr = build2 (LT_EXPR, boolean_type_node, loop_var,
+ build_int_cst (integer_type_node, vlength));
+ cond_expr = build3 (COND_EXPR, void_type_node, cmp_expr,
+ build1 (GOTO_EXPR, void_type_node, body_label),
+ build1 (GOTO_EXPR, void_type_node, exit_label));
+
+ append_to_statement_list_force (cond_expr, &loop);
+ append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node,
+ body_label), &loop);
+ append_to_statement_list_force (fn_body, &loop);
+
+ incr_expr = build2 (MODIFY_EXPR, void_type_node, loop_var,
+ build2 (PLUS_EXPR, TREE_TYPE (loop_var), loop_var,
+ build_int_cst (integer_type_node, 1)));
+
+ append_to_statement_list_force (incr_expr, &loop);
+ append_to_statement_list_force (build1 (GOTO_EXPR, void_type_node, if_label),
+ &loop);
+ append_to_statement_list_force (build1 (LABEL_EXPR, void_type_node,
+ exit_label), &loop);
- return new_fndecl;
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = loop;
+ else
+ DECL_SAVED_TREE (fndecl) = loop;
+
+ pop_cfun ();
+ current_function_decl = old_fndecl;
+
+ return loop_var;
}
-/* this function wil create the elemental vector function node */
-static struct cgraph_node *
-create_elem_fn_nodes (struct cgraph_node *node)
+/* this function will add the mask if statement for masked clone */
+static void
+add_elem_fn_mask (tree fndecl)
{
- tree new_decl, old_decl, new_decl_name, opt_attr;
- tree proc_attr, opp_proc_attr = NULL_TREE;
- struct cgraph_node *new_node;
- elem_fn_info *elem_fn_values = NULL;
- char *suffix = NULL;
-
- old_decl = node->symbol.decl;
- new_decl = copy_node (old_decl);
- TREE_TYPE (new_decl) = copy_node (TREE_TYPE (old_decl));
- elem_fn_values = extract_elem_fn_values (old_decl);
+ tree ii_arg;
+ tree cond_expr, cmp_expr, old_fndecl;
+ tree fn_body = NULL_TREE;
- if (elem_fn_values)
+ old_fndecl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (fndecl));
+ current_function_decl = fndecl;
+
+ if (!DECL_SAVED_TREE (fndecl))
+ return;
+
+ for (ii_arg = DECL_ARGUMENTS (fndecl); DECL_CHAIN (ii_arg);
+ ii_arg = DECL_CHAIN (ii_arg))
{
- suffix = find_suffix (elem_fn_values, false);
+ ;
}
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ fn_body = BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl));
else
- return NULL;
-
- new_decl_name = rename_elem_fn (new_decl, suffix);
+ fn_body = DECL_SAVED_TREE (fndecl); /* not sure if we ever get here */
- SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME(new_decl_name));
- SET_DECL_RTL (new_decl, NULL);
- TREE_SYMBOL_REFERENCED (DECL_NAME (new_decl_name)) = 1;
-
- new_node = cgraph_copy_node_for_versioning (node, new_decl, NULL, NULL);
- new_node->symbol.externally_visible = node->symbol.externally_visible;
- new_node->lowered = true;
+ gcc_assert (DECL_NAME (ii_arg) == get_identifier ("__elem_fn_mask"));
- tree_elem_fn_versioning (old_decl, new_decl, NULL, false, NULL, false, NULL,
- NULL, elem_fn_values->vectorlength[0]);
- cgraph_call_function_insertion_hooks (new_node);
- DECL_STRUCT_FUNCTION (new_decl)->elem_fn_already_cloned = true;
- DECL_STRUCT_FUNCTION (new_decl)->curr_properties = cfun->curr_properties;
- DECL_ATTRIBUTES (cfun->decl) =
- remove_attribute ("vector", DECL_ATTRIBUTES (cfun->decl));
- DECL_ATTRIBUTES (new_node->symbol.decl) =
- remove_attribute ("vector", DECL_ATTRIBUTES (new_node->symbol.decl));
+ cmp_expr = fold_build2 (NE_EXPR, TREE_TYPE (ii_arg), ii_arg,
+ build_int_cst (TREE_TYPE (TREE_TYPE (ii_arg)), 0));
+ cond_expr = fold_build3 (COND_EXPR, void_type_node, cmp_expr, fn_body,
+ build_empty_stmt (UNKNOWN_LOCATION));
- proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr);
+ if (TREE_CODE (DECL_SAVED_TREE (fndecl)) == BIND_EXPR)
+ BIND_EXPR_BODY (DECL_SAVED_TREE (fndecl)) = cond_expr;
+ else
+ DECL_SAVED_TREE (fndecl) = cond_expr;
+
+ pop_cfun ();
+ current_function_decl = old_fndecl;
- if (proc_attr)
- decl_attributes (&new_node->symbol.decl, proc_attr, 0);
- if (opp_proc_attr)
- decl_attributes (&cfun->decl, opp_proc_attr, 0);
+ return;
+
+}
- opt_attr = create_optimize_attribute (3); /* this will turn vectorizer on */
- if (opt_attr)
- decl_attributes (&new_node->symbol.decl, opt_attr, 0);
+/* this function will do hacks necessary to recognize the cloned function */
+static void
+cg_hacks (tree fndecl)
+{
+ const tree outer = current_function_decl;
+ struct function *f = DECL_STRUCT_FUNCTION (fndecl);
+
+ f->curr_properties = cfun->curr_properties;
+ push_cfun (f);
+ current_function_decl = fndecl;
- return new_node;
+ cgraph_add_new_function (fndecl, false);
+ cgraph_finalize_function (fndecl, true);
+
+ pop_cfun ();
+ current_function_decl = outer;
+
+ return;
}
-/* This function will extact the vector attribute and store the data in the
- * elem_fn_info structure.
- */
-static elem_fn_info *
-extract_elem_fn_values (tree decl)
+/* this function will create clones for function marked with vector attribute */
+void
+elem_fn_create_fn (tree fndecl)
{
+ tree new_masked_fn = NULL_TREE, new_unmasked_fn = NULL_TREE;
+ tree induction_var = NULL_TREE;
elem_fn_info *elem_fn_values = NULL;
- int x = 0; /* this is a dummy variable */
- int arg_number = 0, ii = 0;
- tree ii_tree, jj_tree, kk_tree;
- tree decl_attr = DECL_ATTRIBUTES (decl);
-
- if (!decl_attr)
- return NULL;
+ char *masked_suffix = NULL, *unmasked_suffix = NULL;
+ tree proc_attr = NULL_TREE, opp_proc_attr = NULL_TREE, opt_attr = NULL_TREE;
+ if (!fndecl)
+ return;
- elem_fn_values = (elem_fn_info *)xmalloc (sizeof (elem_fn_info));
- gcc_assert (elem_fn_values);
+ elem_fn_values = extract_elem_fn_values (fndecl);
- elem_fn_values->proc_type = NULL;
- elem_fn_values->mask = USE_BOTH;
- elem_fn_values->no_vlengths = 0;
- elem_fn_values->no_uvars = 0;
- elem_fn_values->no_lvars = 0;
-
+ if (!elem_fn_values)
+ return;
- for (ii_tree = decl_attr; ii_tree; ii_tree = TREE_CHAIN (ii_tree))
+ if (elem_fn_values->mask == USE_MASK)
+ masked_suffix = find_suffix (elem_fn_values, true);
+ else if (elem_fn_values->mask == USE_NOMASK)
+ unmasked_suffix = find_suffix (elem_fn_values, false);
+ else
{
- tree ii_purpose = TREE_PURPOSE (ii_tree);
- tree ii_value = TREE_VALUE (ii_tree);
- if (TREE_CODE (ii_purpose) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (ii_purpose), "vector"))
- {
- for (jj_tree = ii_value; jj_tree;
- jj_tree = TREE_CHAIN (jj_tree))
- {
- tree jj_value = TREE_VALUE (jj_tree);
- tree jj_purpose = TREE_PURPOSE (jj_value);
- if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (jj_purpose), "processor"))
- {
- for (kk_tree = TREE_VALUE (jj_value); kk_tree;
- kk_tree = TREE_CHAIN (kk_tree))
- {
- tree kk_value = TREE_VALUE (kk_tree);
- if (TREE_CODE (kk_value) == STRING_CST)
- elem_fn_values->proc_type =
- xstrdup (TREE_STRING_POINTER (kk_value));
- }
- }
- else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (jj_purpose),
- "vectorlength"))
- {
- for (kk_tree = TREE_VALUE (jj_value); kk_tree;
- kk_tree = TREE_CHAIN (kk_tree))
- {
- tree kk_value = TREE_VALUE (kk_tree);
- if (TREE_CODE (kk_value) == INTEGER_CST)
- {
- x = elem_fn_values->no_vlengths;
- elem_fn_values->vectorlength[x] =
- (int) TREE_INT_CST_LOW (kk_value);
- elem_fn_values->no_vlengths++;
- }
- }
- }
- else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (jj_purpose), "uniform"))
- {
- for (kk_tree = TREE_VALUE (jj_value); kk_tree;
- kk_tree = TREE_CHAIN (kk_tree))
- {
- tree kk_value = TREE_VALUE (kk_tree);
- elem_fn_values->uniform_vars[elem_fn_values->no_uvars] =
- xstrdup (TREE_STRING_POINTER (kk_value));
- elem_fn_values->no_uvars++;
- }
- }
- else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (jj_purpose), "linear"))
- {
- for (kk_tree = TREE_VALUE (jj_value); kk_tree;
- kk_tree = TREE_CHAIN (kk_tree))
- {
- tree kk_value = TREE_VALUE (kk_tree);
- elem_fn_values->linear_vars[elem_fn_values->no_lvars] =
- xstrdup (TREE_STRING_POINTER (kk_value));
- kk_tree = TREE_CHAIN (kk_tree);
- kk_value = TREE_VALUE (kk_tree);
- elem_fn_values->linear_steps[elem_fn_values->no_lvars] =
- TREE_INT_CST_LOW (kk_value);
- elem_fn_values->no_lvars++;
- }
- }
- else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (jj_purpose), "mask"))
- elem_fn_values->mask = USE_MASK;
- else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
- && !strcmp (IDENTIFIER_POINTER (jj_purpose), "nomask"))
- elem_fn_values->mask = USE_NOMASK;
- }
- }
+ masked_suffix = find_suffix (elem_fn_values, true);
+ unmasked_suffix = find_suffix (elem_fn_values, false);
}
- for (ii_tree = DECL_ARGUMENTS (decl); ii_tree; ii_tree = DECL_CHAIN (ii_tree))
+ if (masked_suffix)
{
- arg_number++;
- bool already_found = false;
- for (ii = 0; ii < elem_fn_values->no_uvars; ii++)
- {
- if (DECL_NAME (ii_tree)
- && !strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_tree)),
- elem_fn_values->uniform_vars[ii]))
- {
- already_found = true;
- elem_fn_values->uniform_location[ii] = arg_number;
- }
- }
- for (ii = 0; ii < elem_fn_values->no_lvars; ii++)
- {
- if (DECL_NAME (ii_tree)
- && !strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_tree)),
- elem_fn_values->linear_vars[ii]))
- {
- if (already_found)
- fatal_error
- ("variable %s defined in both uniform and linear clause",
- elem_fn_values->linear_vars[ii]);
- else
- {
- already_found = true;
- elem_fn_values->linear_location[ii] = arg_number;
- }
- }
- }
- if (!already_found) /* this means this variable is a private */
- elem_fn_values->private_location[elem_fn_values->no_pvars++] =
- arg_number;
- }
+ new_masked_fn = copy_node (fndecl);
+ new_masked_fn = rename_elem_fn (new_masked_fn, masked_suffix);
+ SET_DECL_RTL (new_masked_fn, NULL);
+ TREE_SYMBOL_REFERENCED (DECL_NAME (new_masked_fn)) = 1;
+ tree_elem_fn_versioning (fndecl, new_masked_fn, NULL, false, NULL, false,
+ NULL, NULL, elem_fn_values->vectorlength[0],
+ true);
+ proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr);
+ if (proc_attr)
+ decl_attributes (&new_masked_fn, proc_attr, 0);
+ if (opp_proc_attr)
+ decl_attributes (&fndecl, opp_proc_attr, 0);
+
+ opt_attr = create_optimize_attribute (3); /* will turn vectorizer on */
+ if (opt_attr)
+ decl_attributes (&new_masked_fn, opt_attr, 0);
- elem_fn_values->total_no_args = arg_number;
-
- return elem_fn_values;
-}
-
-/* Entry point function for creating the vector elemental function */
-static unsigned int
-create_elem_vec_fn (void)
-{
- struct cgraph_node *ii_node, *copied_node;
-
- FOR_EACH_DEFINED_FUNCTION (ii_node)
- {
- tree node_decl = ii_node->symbol.decl;
- if (is_elem_fn (node_decl)
- && DECL_STRUCT_FUNCTION (node_decl)
- && !DECL_STRUCT_FUNCTION (node_decl)->elem_fn_already_cloned)
- {
- copied_node = create_elem_fn_nodes (ii_node);
- if (DECL_RTL (ii_node->symbol.decl))
- {
- SET_DECL_RTL (copied_node->symbol.decl,
- copy_rtx (DECL_RTL (ii_node->symbol.decl)));
- XEXP (DECL_RTL (copied_node->symbol.decl), 0) =
- gen_rtx_SYMBOL_REF
- (GET_MODE (XEXP (DECL_RTL (ii_node->symbol.decl), 0)),
- IDENTIFIER_POINTER (DECL_NAME (copied_node->symbol.decl)));
- }
-
- }
+ DECL_ATTRIBUTES (new_masked_fn) =
+ remove_attribute ("vector", DECL_ATTRIBUTES (new_masked_fn));
+
+ add_elem_fn_mask (new_masked_fn);
+ induction_var = add_elem_fn_loop (new_masked_fn,
+ elem_fn_values->vectorlength[0]);
+ fix_elem_fn_return_value (new_masked_fn, induction_var);
+ cg_hacks (new_masked_fn);
+ SET_DECL_ASSEMBLER_NAME (new_masked_fn, DECL_NAME (new_masked_fn));
+ if (DECL_STRUCT_FUNCTION (new_masked_fn))
+ DECL_STRUCT_FUNCTION (new_masked_fn)->elem_fn_already_cloned = true;
}
- return 0;
-}
-
-
-struct gimple_opt_pass pass_elem_fn =
- {
+ if (unmasked_suffix)
{
- GIMPLE_PASS,
- "tree_elem_fn", /* name */
- 0, /* gate */
- create_elem_vec_fn, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- TV_NONE, /* tv_id */
- PROP_gimple_any| PROP_cfg, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_verify_flow, /* todo_flags_finish */
+ new_unmasked_fn = copy_node (fndecl);
+ new_unmasked_fn = rename_elem_fn (new_unmasked_fn, unmasked_suffix);
+ SET_DECL_RTL (new_unmasked_fn, NULL);
+ TREE_SYMBOL_REFERENCED (DECL_NAME (new_unmasked_fn)) = 1;
+ tree_elem_fn_versioning (fndecl, new_unmasked_fn, NULL, false, NULL,
+ false, NULL, NULL,
+ elem_fn_values->vectorlength[0], false);
+ proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr);
+ if (proc_attr)
+ decl_attributes (&new_unmasked_fn, proc_attr, 0);
+ if (opp_proc_attr)
+ decl_attributes (&fndecl, opp_proc_attr, 0);
+
+ opt_attr = create_optimize_attribute (3); /* will turn vectorizer on */
+ if (opt_attr)
+ decl_attributes (&new_unmasked_fn, opt_attr, 0);
+
+ DECL_ATTRIBUTES (new_unmasked_fn) =
+ remove_attribute ("vector", DECL_ATTRIBUTES (new_unmasked_fn));
+ induction_var = add_elem_fn_loop (new_unmasked_fn,
+ elem_fn_values->vectorlength[0]);
+ fix_elem_fn_return_value (new_unmasked_fn, induction_var);
+ cg_hacks (new_unmasked_fn);
+ SET_DECL_ASSEMBLER_NAME (new_unmasked_fn, DECL_NAME (new_unmasked_fn));
+ if (DECL_STRUCT_FUNCTION (new_unmasked_fn))
+ DECL_STRUCT_FUNCTION (new_unmasked_fn)->elem_fn_already_cloned = true;
}
- };
+ DECL_ATTRIBUTES (fndecl) = remove_attribute ("vector",
+ DECL_ATTRIBUTES (fndecl));
+ free (elem_fn_values);
+ return;
+}
===================================================================
@@ -0,0 +1,461 @@
+/* This file is part of the Intel(R) Cilk(TM) Plus support
+ This file contains the language independent functions for
+ Elemental functions.
+
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ Written by Balaji V. Iyer <balaji.v.iyer@intel.com>,
+ Intel Corporation
+
+ Many Thanks to Karthik Kumar for advice on the basic technique
+ about cloning functions.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "langhooks.h"
+#include "cilk.h"
+#include "tm_p.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "output.h"
+#include "c-family/c-common.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "tree-pass.h"
+#include "timevar.h"
+#include "flags.h"
+#include "c-tree.h"
+#include "tree-inline.h"
+#include "cgraph.h"
+#include "ipa-prop.h"
+#include "opts.h"
+#include "tree-iterator.h"
+#include "toplev.h"
+#include "options.h"
+#include "intl.h"
+#include "vec.h"
+#include "cilk.h"
+
+#define MAX_VARS 50
+
+enum elem_fn_parm_type find_elem_fn_parm_type (gimple, tree, tree *);
+bool is_elem_fn (tree);
+tree find_elem_fn_name (tree old_fndecl, tree vectype_out, tree vectype_in);
+elem_fn_info *extract_elem_fn_values (tree decl);
+
+/* This function will find the appropriate processor code in the function
+ * mangling vector function
+ */
+char *
+find_processor_code (elem_fn_info *elem_fn_values)
+{
+ if (!elem_fn_values || !elem_fn_values->proc_type)
+ return xstrdup ("B");
+
+ if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
+ return xstrdup ("B");
+ else if (!strcmp (elem_fn_values->proc_type, "pentium4_sse3"))
+ return xstrdup ("D");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_ssse3"))
+ return xstrdup ("E");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse_4_1"))
+ return xstrdup ("F");
+ else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
+ return xstrdup ("H");
+ else
+ gcc_unreachable ();
+
+ return NULL; /* should never get here */
+}
+
+/* this function will return vectorlength, if specified, in string format -OR-
+ * it will give the default vector length for the specified architecture. */
+char *
+find_vlength_code (elem_fn_info *elem_fn_values)
+{
+ char *vlength_code = (char *) xmalloc (sizeof (char) * 10);
+ if (!elem_fn_values)
+ {
+ sprintf (vlength_code, "4");
+ return vlength_code;
+ }
+
+ memset (vlength_code, 10, 0);
+
+ if (elem_fn_values->no_vlengths != 0)
+ sprintf(vlength_code,"%d", elem_fn_values->vectorlength[0]);
+ else
+ {
+ if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
+ sprintf (vlength_code,"4");
+ else if (!strcmp (elem_fn_values->proc_type, "pentium4_sse3"))
+ sprintf (vlength_code, "4");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_ssse3"))
+ sprintf (vlength_code, "4");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse_4_1"))
+ sprintf (vlength_code, "4");
+ else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
+ sprintf (vlength_code, "4");
+ else
+ gcc_unreachable ();
+ }
+ return vlength_code;
+}
+
+
+/* this function will concatinate the suffix to the existing function decl */
+tree
+rename_elem_fn (tree decl, const char *suffix)
+{
+ int length = 0;
+ const char *fn_name = IDENTIFIER_POINTER (DECL_NAME (decl));
+ char *new_fn_name;
+ tree new_decl = NULL_TREE;
+
+ if (!suffix || !fn_name)
+ return decl;
+ else
+ new_decl = decl;
+
+ length = strlen (fn_name) + strlen (suffix) + 1;
+ new_fn_name = (char *)xmalloc (length);
+ strcpy (new_fn_name, fn_name);
+ strcat (new_fn_name, suffix);
+
+ DECL_NAME (new_decl) = get_identifier (new_fn_name);
+ return new_decl;
+}
+
+
+/* this function will find the appropriate mangling suffix for the vector
+ * function */
+char *
+find_suffix (elem_fn_info *elem_fn_values, bool masked)
+{
+ char *suffix = (char*)xmalloc (100);
+ char tmp_str[10];
+ int arg_number, ii_pvar, ii_uvar, ii_lvar;
+ strcpy (suffix, "._simdsimd_");
+ strcat (suffix, find_processor_code (elem_fn_values));
+ strcat (suffix, find_vlength_code (elem_fn_values));
+
+ if (masked)
+ strcat (suffix, "m");
+ else
+ strcat (suffix, "n");
+
+ for (arg_number = 1; arg_number <= elem_fn_values->total_no_args;
+ arg_number++)
+ {
+ for (ii_lvar = 0; ii_lvar < elem_fn_values->no_lvars; ii_lvar++)
+ {
+ if (elem_fn_values->linear_location[ii_lvar] == arg_number)
+ {
+ strcat (suffix, "_l");
+ sprintf(tmp_str, "%d", elem_fn_values->linear_steps[ii_lvar]);
+ strcat (suffix, tmp_str);
+ }
+ }
+ for (ii_uvar = 0; ii_uvar < elem_fn_values->no_uvars; ii_uvar++)
+ {
+ if (elem_fn_values->uniform_location[ii_uvar] == arg_number)
+ strcat (suffix, "_s1");
+ }
+ for (ii_pvar = 0; ii_pvar < elem_fn_values->no_pvars; ii_pvar++)
+ {
+ if (elem_fn_values->private_location[ii_pvar] == arg_number)
+ strcat (suffix, "_v1");
+ }
+ }
+ return suffix;
+}
+
+
+/* this is an helper function for find_elem_fn_param_type */
+static enum elem_fn_parm_type
+find_elem_fn_parm_type_1 (tree fndecl, int parm_no, tree *step_size)
+{
+ int ii = 0;
+ elem_fn_info *elem_fn_values;
+
+ elem_fn_values = extract_elem_fn_values (fndecl);
+ if (!elem_fn_values)
+ return TYPE_NONE;
+
+ for (ii = 0; ii < elem_fn_values->no_lvars; ii++)
+ if (elem_fn_values->linear_location[ii] == parm_no)
+ {
+ if (step_size != NULL)
+ *step_size = build_int_cst (integer_type_node,
+ elem_fn_values->linear_steps[ii]);
+ return TYPE_LINEAR;
+ }
+
+ for (ii = 0; ii < elem_fn_values->no_uvars; ii++)
+ if (elem_fn_values->uniform_location[ii] == parm_no)
+ return TYPE_UNIFORM;
+
+ return TYPE_NONE;
+}
+
+
+/* this function will return the type of a parameter in elemental function.
+ The choices are UNIFORM or LINEAR. */
+enum elem_fn_parm_type
+find_elem_fn_parm_type (gimple stmt, tree op, tree *step_size)
+{
+ tree fndecl, parm = NULL_TREE;
+ int ii, nargs;
+ enum elem_fn_parm_type return_type = TYPE_NONE;
+
+ if (gimple_code (stmt) != GIMPLE_CALL)
+ return TYPE_NONE;
+
+ fndecl = gimple_call_fndecl (stmt);
+ gcc_assert (fndecl);
+
+ nargs = gimple_call_num_args (stmt);
+
+ for (ii = 0; ii < nargs; ii++)
+ {
+ parm = gimple_call_arg (stmt, ii);
+ if (op == parm)
+ {
+ return_type = find_elem_fn_parm_type_1 (fndecl, ii, step_size);
+ return return_type;
+ }
+ }
+ return return_type;
+}
+/* this function will return the appropriate cloned named for the function */
+tree
+find_elem_fn_name (tree old_fndecl, tree vectype_out,
+ tree vectype_in ATTRIBUTE_UNUSED)
+{
+ elem_fn_info *elem_fn_values = NULL;
+ tree new_fndecl = NULL_TREE, arg_type = NULL_TREE;
+ char *suffix = NULL;
+
+ elem_fn_values = extract_elem_fn_values (old_fndecl);
+
+ if (elem_fn_values)
+ {
+ if (elem_fn_values->no_vlengths > 0)
+ {
+ if (elem_fn_values->vectorlength[0] ==
+ (int)TYPE_VECTOR_SUBPARTS (vectype_out))
+ suffix = find_suffix (elem_fn_values, false);
+ else
+ return NULL_TREE;
+ }
+ else
+ return NULL_TREE;
+ }
+ else
+ return NULL_TREE;
+
+ new_fndecl = copy_node (rename_elem_fn (old_fndecl, suffix));
+ TREE_TYPE (new_fndecl) = copy_node (TREE_TYPE (old_fndecl));
+
+ TYPE_ARG_TYPES (TREE_TYPE (new_fndecl)) =
+ copy_list (TYPE_ARG_TYPES (TREE_TYPE (new_fndecl)));
+
+ for (arg_type = TYPE_ARG_TYPES (TREE_TYPE (new_fndecl));
+ arg_type && arg_type != void_type_node;
+ arg_type = TREE_CHAIN (arg_type))
+ TREE_VALUE (arg_type) = vectype_out;
+
+ if (TREE_TYPE (TREE_TYPE (new_fndecl)) != void_type_node)
+ {
+ TREE_TYPE (TREE_TYPE (new_fndecl)) =
+ copy_node (TREE_TYPE (TREE_TYPE (new_fndecl)));
+ TREE_TYPE (TREE_TYPE (new_fndecl)) = vectype_out;
+ DECL_MODE (new_fndecl) = TYPE_MODE (vectype_out);
+ }
+
+ return new_fndecl;
+}
+
+/* this function will extract the elem. function values from a vector and store
+ * it in a data structure and return that */
+elem_fn_info *
+extract_elem_fn_values (tree decl)
+{
+ elem_fn_info *elem_fn_values = NULL;
+ int x = 0; /* this is a dummy variable */
+ int arg_number = 0, ii = 0;
+ tree ii_tree, jj_tree, kk_tree;
+ tree decl_attr = DECL_ATTRIBUTES (decl);
+
+ if (!decl_attr)
+ return NULL;
+
+ elem_fn_values = (elem_fn_info *)xmalloc (sizeof (elem_fn_info));
+ gcc_assert (elem_fn_values);
+
+ elem_fn_values->proc_type = NULL;
+ elem_fn_values->mask = USE_BOTH;
+ elem_fn_values->no_vlengths = 0;
+ elem_fn_values->no_uvars = 0;
+ elem_fn_values->no_lvars = 0;
+
+
+ for (ii_tree = decl_attr; ii_tree; ii_tree = TREE_CHAIN (ii_tree))
+ {
+ tree ii_purpose = TREE_PURPOSE (ii_tree);
+ tree ii_value = TREE_VALUE (ii_tree);
+ if (TREE_CODE (ii_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (ii_purpose), "vector"))
+ {
+ for (jj_tree = ii_value; jj_tree;
+ jj_tree = TREE_CHAIN (jj_tree))
+ {
+ tree jj_value = TREE_VALUE (jj_tree);
+ tree jj_purpose = TREE_PURPOSE (jj_value);
+ if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "processor"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ if (TREE_CODE (kk_value) == STRING_CST)
+ elem_fn_values->proc_type =
+ xstrdup (TREE_STRING_POINTER (kk_value));
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose),
+ "vectorlength"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ if (TREE_CODE (kk_value) == INTEGER_CST)
+ {
+ x = elem_fn_values->no_vlengths;
+ elem_fn_values->vectorlength[x] =
+ (int) TREE_INT_CST_LOW (kk_value);
+ elem_fn_values->no_vlengths++;
+ }
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "uniform"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ elem_fn_values->uniform_vars[elem_fn_values->no_uvars] =
+ xstrdup (TREE_STRING_POINTER (kk_value));
+ elem_fn_values->no_uvars++;
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "linear"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ elem_fn_values->linear_vars[elem_fn_values->no_lvars] =
+ xstrdup (TREE_STRING_POINTER (kk_value));
+ kk_tree = TREE_CHAIN (kk_tree);
+ kk_value = TREE_VALUE (kk_tree);
+ elem_fn_values->linear_steps[elem_fn_values->no_lvars] =
+ TREE_INT_CST_LOW (kk_value);
+ elem_fn_values->no_lvars++;
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "mask"))
+ elem_fn_values->mask = USE_MASK;
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "nomask"))
+ elem_fn_values->mask = USE_NOMASK;
+ }
+ }
+ }
+
+ for (ii_tree = DECL_ARGUMENTS (decl); ii_tree;
+ ii_tree = DECL_CHAIN (ii_tree))
+ {
+ arg_number++;
+ bool already_found = false;
+ for (ii = 0; ii < elem_fn_values->no_uvars; ii++)
+ {
+ if (DECL_NAME (ii_tree)
+ && !strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_tree)),
+ elem_fn_values->uniform_vars[ii]))
+ {
+ already_found = true;
+ elem_fn_values->uniform_location[ii] = arg_number;
+ }
+ }
+ for (ii = 0; ii < elem_fn_values->no_lvars; ii++)
+ {
+ if (DECL_NAME (ii_tree)
+ && !strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_tree)),
+ elem_fn_values->linear_vars[ii]))
+ {
+ if (already_found)
+ fatal_error
+ ("variable %s defined in both uniform and linear clause",
+ elem_fn_values->linear_vars[ii]);
+ else
+ {
+ already_found = true;
+ elem_fn_values->linear_location[ii] = arg_number;
+ }
+ }
+ }
+ if (!already_found) /* this means this variable is a private */
+ elem_fn_values->private_location[elem_fn_values->no_pvars++] =
+ arg_number;
+ }
+
+ elem_fn_values->total_no_args = arg_number;
+
+ return elem_fn_values;
+}
+
+/* this function will check to see if the node is part of an function that
+ * needs to be converted to its vector equivalent. */
+bool
+is_elem_fn (tree fndecl)
+{
+ tree ii_tree;
+
+ for (ii_tree = DECL_ATTRIBUTES (fndecl); ii_tree;
+ ii_tree = TREE_CHAIN (ii_tree))
+ {
+ tree ii_value = TREE_PURPOSE (ii_tree);
+ if (TREE_CODE (ii_value) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (ii_value), "vector"))
+ return true;
+ }
+
+ /* If we are here, then we didn't find a vector keyword, so it is false */
+ return false;
+}
===================================================================
@@ -8360,6 +8360,13 @@
oldfn = current_function_decl;
current_function_decl = fndecl;
+
+ /* here we check to see if we have a function with the attribute vector
+ * with it. If so, then we must clone it to masked/unmasked when apropriate.
+ */
+ if (flag_enable_cilk && is_elem_fn (fndecl))
+ elem_fn_create_fn (fndecl);
+
if (DECL_STRUCT_FUNCTION (fndecl))
push_cfun (DECL_STRUCT_FUNCTION (fndecl));
else
===================================================================
@@ -3806,7 +3806,7 @@
static inline void
elem_fn_add_local_variables (struct function *callee, struct function *caller,
copy_body_data *id, bool check_var_ann,
- int vlength)
+ int vlength ATTRIBUTE_UNUSED)
{
tree var;
unsigned ix;
@@ -3836,9 +3836,6 @@
SET_DECL_DEBUG_EXPR (new_var, tem);
}
TREE_TYPE (new_var) = copy_node (TREE_TYPE (new_var));
- TREE_TYPE (new_var) =
- build_vector_type (copy_node (TREE_TYPE (new_var)), vlength);
- DECL_GIMPLE_REG_P (new_var) = 1;
add_local_decl (caller, new_var);
}
}
@@ -4994,27 +4991,35 @@
static tree
elem_fn_copy_arguments_for_versioning (tree orig_parm, copy_body_data * id,
bitmap args_to_skip, tree *vars,
- int vlength)
+ int vlength, bool masked)
{
tree arg, *parg;
tree new_parm = NULL;
int i = 0;
-
+ tree masked_parm = NULL_TREE;
parg = &new_parm;
+ if (masked)
+ {
+ masked_parm = build_decl (UNKNOWN_LOCATION, PARM_DECL,
+ get_identifier ("__elem_fn_mask"),
+ build_vector_type (integer_type_node, vlength));
+ DECL_ARG_TYPE (masked_parm) = build_vector_type (integer_type_node,
+ vlength);
+ DECL_ARTIFICIAL (masked_parm) = 1;
+ lang_hooks.dup_lang_specific_decl (masked_parm);
+ }
for (arg = orig_parm; arg; arg = DECL_CHAIN (arg), i++)
if (!args_to_skip || !bitmap_bit_p (args_to_skip, i))
{
tree new_tree = remap_decl (arg, id);
if (TREE_CODE (new_tree) != PARM_DECL)
new_tree = id->copy_decl (arg, id);
- /* bviyer; I am using a dummy value of 4 to make sure this works */
TREE_TYPE (new_tree) = copy_node (TREE_TYPE (new_tree));
- TREE_TYPE (new_tree) =
- build_vector_type (TREE_TYPE (new_tree), vlength);
- DECL_ARG_TYPE (new_tree) =
- build_vector_type (DECL_ARG_TYPE (new_tree), vlength);
- DECL_GIMPLE_REG_P (new_tree) = 1;
+ TREE_TYPE (new_tree) = build_vector_type (TREE_TYPE (new_tree),
+ vlength);
+ DECL_ARG_TYPE (new_tree) = build_vector_type (DECL_ARG_TYPE (new_tree),
+ vlength);
lang_hooks.dup_lang_specific_decl (new_tree);
*parg = new_tree;
parg = &DECL_CHAIN (new_tree);
@@ -5031,6 +5036,14 @@
DECL_CHAIN (var) = *vars;
*vars = var;
}
+ if (masked && masked_parm)
+ {
+ for (arg = new_parm; DECL_CHAIN (arg); arg = DECL_CHAIN(arg))
+ ;
+
+ DECL_CONTEXT (masked_parm) = DECL_CONTEXT (arg);
+ DECL_CHAIN (arg) = masked_parm;
+ }
return new_parm;
}
@@ -5444,20 +5457,60 @@
return;
}
+static void
+initialize_elem_fn_cfun (tree new_fndecl, tree callee_fndecl)
+{
+ struct function *src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl);
+
+ /* Get clean struct function. */
+ push_struct_function (new_fndecl);
+
+ /* We will rebuild these, so just sanity check that they are empty. */
+ gcc_assert (VALUE_HISTOGRAMS (cfun) == NULL);
+ gcc_assert (cfun->local_decls == NULL);
+ gcc_assert (cfun->cfg == NULL);
+ gcc_assert (cfun->decl == new_fndecl);
+
+ /* Copy items we preserve during cloning. */
+ cfun->static_chain_decl = src_cfun->static_chain_decl;
+ cfun->nonlocal_goto_save_area = src_cfun->nonlocal_goto_save_area;
+ cfun->function_end_locus = src_cfun->function_end_locus;
+ cfun->curr_properties = src_cfun->curr_properties & ~PROP_loops;
+ cfun->last_verified = src_cfun->last_verified;
+ cfun->va_list_gpr_size = src_cfun->va_list_gpr_size;
+ cfun->va_list_fpr_size = src_cfun->va_list_fpr_size;
+ cfun->has_nonlocal_label = src_cfun->has_nonlocal_label;
+ cfun->stdarg = src_cfun->stdarg;
+ cfun->after_inlining = src_cfun->after_inlining;
+ cfun->can_throw_non_call_exceptions
+ = src_cfun->can_throw_non_call_exceptions;
+ cfun->returns_struct = src_cfun->returns_struct;
+ cfun->returns_pcc_struct = src_cfun->returns_pcc_struct;
+ cfun->after_tree_profile = src_cfun->after_tree_profile;
+
+ if (src_cfun->eh)
+ init_eh_for_function ();
+
+ if (src_cfun->gimple_df)
+ {
+ init_tree_ssa (cfun);
+ cfun->gimple_df->in_ssa_p = true;
+ init_ssa_operands (cfun);
+ }
+ pop_cfun ();
+}
+
void
tree_elem_fn_versioning (tree old_decl, tree new_decl,
VEC(ipa_replace_map_p,gc)* tree_map,
bool update_clones, bitmap args_to_skip,
- bool skip_return, bitmap blocks_to_copy,
- basic_block new_entry, int vlength)
+ bool skip_return, bitmap blocks_to_copy ATTRIBUTE_UNUSED,
+ basic_block new_entry ATTRIBUTE_UNUSED, int vlength, bool masked)
{
- struct cgraph_node *old_version_node;
- struct cgraph_node *new_version_node;
copy_body_data id;
tree p;
unsigned i;
struct ipa_replace_map *replace_info;
- basic_block old_entry_block, bb;
VEC (gimple, heap) *init_stmts = VEC_alloc (gimple, heap, 10);
tree old_current_function_decl = current_function_decl;
@@ -5466,21 +5519,7 @@
gcc_assert (TREE_CODE (old_decl) == FUNCTION_DECL
&& TREE_CODE (new_decl) == FUNCTION_DECL);
DECL_POSSIBLY_INLINED (old_decl) = 1;
-
- old_version_node = cgraph_get_node (old_decl);
- gcc_checking_assert (old_version_node);
- new_version_node = cgraph_get_node (new_decl);
- gcc_checking_assert (new_version_node);
-
- if (TREE_TYPE (TREE_TYPE (old_decl)) != void_type_node)
- {
- TREE_TYPE (TREE_TYPE (new_decl)) =
- copy_node (TREE_TYPE (TREE_TYPE (old_decl)));
- TREE_TYPE (TREE_TYPE (new_decl)) =
- build_vector_type (TREE_TYPE (TREE_TYPE (new_decl)), vlength);
- }
-
/* Copy over debug args. */
if (DECL_HAS_DEBUG_ARGS_P (old_decl))
{
@@ -5502,9 +5541,6 @@
(*debug_hooks->outlining_inline_function) (old_decl);
DECL_ARTIFICIAL (new_decl) = 1;
- DECL_ABSTRACT_ORIGIN (new_decl) = DECL_ORIGIN (old_decl);
- DECL_FUNCTION_PERSONALITY (new_decl) = DECL_FUNCTION_PERSONALITY (old_decl);
-
/* Prepare the data structures for the tree copy. */
memset (&id, 0, sizeof (id));
@@ -5515,24 +5551,10 @@
id.debug_map = NULL;
id.src_fn = old_decl;
id.dst_fn = new_decl;
- id.src_node = old_version_node;
- id.dst_node = new_version_node;
+ id.src_node = NULL;
+ id.dst_node = NULL;
id.src_cfun = DECL_STRUCT_FUNCTION (old_decl);
- if (id.src_node->ipa_transforms_to_apply)
- {
- VEC(ipa_opt_pass,heap) * old_transforms_to_apply =
- id.dst_node->ipa_transforms_to_apply;
- unsigned int i;
- id.dst_node->ipa_transforms_to_apply =
- VEC_copy (ipa_opt_pass, heap, id.src_node->ipa_transforms_to_apply);
- for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++)
- VEC_safe_push (ipa_opt_pass, heap, id.dst_node->ipa_transforms_to_apply,
- VEC_index (ipa_opt_pass,
- old_transforms_to_apply,
- i));
- }
-
id.copy_decl = copy_decl_no_change;
id.transform_call_graph_edges
= update_clones ? CB_CGE_MOVE_CLONES : CB_CGE_MOVE;
@@ -5541,12 +5563,8 @@
id.transform_lang_insert_block = NULL;
current_function_decl = new_decl;
- old_entry_block = ENTRY_BLOCK_PTR_FOR_FUNCTION
- (DECL_STRUCT_FUNCTION (old_decl));
- initialize_cfun (new_decl, old_decl,
- old_entry_block->count);
- DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta
- = id.src_cfun->gimple_df->ipa_pta;
+
+ initialize_elem_fn_cfun (new_decl, old_decl);
push_cfun (DECL_STRUCT_FUNCTION (new_decl));
/* Copy the function's static chain. */
@@ -5602,7 +5620,8 @@
if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
DECL_ARGUMENTS (new_decl) =
elem_fn_copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
- args_to_skip, &vars, vlength);
+ args_to_skip, &vars,
+ vlength, masked);
DECL_INITIAL (new_decl) = remap_blocks (DECL_INITIAL (id.src_fn), &id);
BLOCK_SUPERCONTEXT (DECL_INITIAL (new_decl)) = new_decl;
@@ -5629,7 +5648,6 @@
{
tree old_name;
DECL_RESULT (new_decl) = remap_decl (DECL_RESULT (old_decl), &id);
- /* bviyer; we are just using 4 for vectorlength just to see if it works */
if (TREE_TYPE (DECL_RESULT (new_decl)) != void_type_node)
{
TREE_TYPE (DECL_RESULT (new_decl)) =
@@ -5638,6 +5656,14 @@
DECL_MODE (DECL_RESULT (new_decl)) =
TYPE_MODE (TREE_TYPE (DECL_RESULT (new_decl)));
}
+ if (TREE_TYPE (TREE_TYPE (old_decl)) != void_type_node)
+ {
+ TREE_TYPE (new_decl) = copy_node (TREE_TYPE (old_decl));
+ TREE_TYPE (TREE_TYPE (new_decl)) =
+ copy_node (TREE_TYPE (TREE_TYPE (old_decl)));
+ TREE_TYPE (TREE_TYPE (new_decl)) =
+ build_vector_type (TREE_TYPE (TREE_TYPE (new_decl)), vlength);
+ }
lang_hooks.dup_lang_specific_decl (DECL_RESULT (new_decl));
if (gimple_in_ssa_p (id.src_cfun)
&& DECL_BY_REFERENCE (DECL_RESULT (old_decl))
@@ -5650,22 +5676,11 @@
set_default_def (DECL_RESULT (new_decl), new_name);
}
}
-
- /* Copy the Function's body. */
- copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE,
- ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, blocks_to_copy, new_entry);
-
+ walk_tree (&DECL_SAVED_TREE (new_decl), copy_tree_body_r, &id, NULL);
/* Renumber the lexical scoping (non-code) blocks consecutively. */
number_blocks (new_decl);
- /* We want to create the BB unconditionally, so that the addition of
- debug stmts doesn't affect BB count, which may in the end cause
- codegen differences. */
- bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR));
- while (VEC_length (gimple, init_stmts))
- insert_init_stmt (&id, bb, VEC_pop (gimple, init_stmts));
- update_clone_info (&id);
-
+
/* Remap the nonlocal_goto_save_area, if any. */
if (cfun->nonlocal_goto_save_area)
{
@@ -5675,49 +5690,12 @@
wi.info = &id;
walk_tree (&cfun->nonlocal_goto_save_area, remap_gimple_op_r, &wi, NULL);
}
-
+
/* Clean up. */
pointer_map_destroy (id.decl_map);
if (id.debug_map)
pointer_map_destroy (id.debug_map);
- free_dominance_info (CDI_DOMINATORS);
- free_dominance_info (CDI_POST_DOMINATORS);
- fold_marked_statements (0, id.statements_to_fold);
- pointer_set_destroy (id.statements_to_fold);
- fold_cond_expr_cond ();
- delete_unreachable_blocks_update_callgraph (&id);
- if (id.dst_node->analyzed)
- cgraph_rebuild_references ();
- update_ssa (TODO_update_ssa);
-
- /* After partial cloning we need to rescale frequencies, so they are
- within proper range in the cloned function. */
- if (new_entry)
- {
- struct cgraph_edge *e;
- rebuild_frequencies ();
-
- new_version_node->count = ENTRY_BLOCK_PTR->count;
- for (e = new_version_node->callees; e; e = e->next_callee)
- {
- basic_block bb = gimple_bb (e->call_stmt);
- e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
- bb);
- e->count = bb->count;
- }
- for (e = new_version_node->indirect_calls; e; e = e->next_callee)
- {
- basic_block bb = gimple_bb (e->call_stmt);
- e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
- bb);
- e->count = bb->count;
- }
- }
-
- free_dominance_info (CDI_DOMINATORS);
- free_dominance_info (CDI_POST_DOMINATORS);
-
gcc_assert (!id.debug_stmts);
VEC_free (gimple, heap, init_stmts);
pop_cfun ();
===================================================================
@@ -1126,7 +1126,7 @@
c-family/c-format.o c-family/c-gimplify.o c-family/c-lex.o \
c-family/c-omp.o c-family/c-opts.o c-family/c-pch.o \
c-family/c-ppoutput.o c-family/c-pragma.o c-family/c-pretty-print.o \
- c-family/c-semantics.o c-family/c-ada-spec.o cilk-spawn.o
+ c-family/c-semantics.o c-family/c-ada-spec.o cilk-spawn.o elem-function.o
# Language-specific object files for C and Objective C.
C_AND_OBJC_OBJS = attribs.o c-errors.o c-decl.o c-typeck.o \
@@ -1451,7 +1451,7 @@
cilk.o \
cilk-low.o \
array-notation-common.o \
- elem-function.o \
+ elem-function-common.o \
$(out_object_file) \
$(EXTRA_OBJS) \
$(host_hook_obj)
===================================================================
@@ -1311,7 +1311,6 @@
NEXT_PASS (pass_lower_vector);
NEXT_PASS (pass_early_warn_uninitialized);
NEXT_PASS (pass_rebuild_cgraph_edges);
- NEXT_PASS (pass_elem_fn);
NEXT_PASS (pass_inline_parameters);
NEXT_PASS (pass_early_inline);
NEXT_PASS (pass_all_early_optimizations);
===================================================================
@@ -1,3 +1,43 @@
+2012-05-23 Balaji V Iyer <balaji.v.iyer@intel.com>
+
+ * cgraphunit.c (cgraph_decide_is_function_needed): Added a check for
+ a cloned elemental function in the checking assert.
+ * tree-inline.c (elem_fn_add_local_variables): Make vlength as an
+ unused attribute. Also stopped moving local variables to vector values.
+ (elem_fn_copy_arguments_for_versioning): Added a bool called masked as
+ a new parameter. Also, added a masked parameter to the cloned function
+ if this bool is set to true. Also, stopped setting arguments to
+ GIMPLE_REG.
+ (initialize_elem_fn_cfun): New function.
+ (tree_elem_fn_versioning): Removed several information pertaining to
+ maintaining a CFG. Also added a masked bool parameter to be passed into
+ several local functions. Called initialize_elem_fn_cfun function.
+ * tree-ssa.c (verify_use): Added a check if current function is
+ an elemental function.
+ * gimplify.c (gimplify_function_tree): Called function to create
+ elemental function when the function is marked appropriately.
+ * elem-function-common.c (find_processor_code): Moved function from
+ elem-function.c.
+ (find_vlength_code): Likewise.
+ (rename_elem_fn): Likewise.
+ (find_suffix): Likewise.
+ (find_elem_fn_parm_type_1): Likewise.
+ (find_elem_fn_parm_type): Likewise.
+ (find_elem_fn_name): Likewise.
+ (extract_elem_fn_values): Likewise.
+ (is_elem_fn): Likewise.
+ * elem-function.c (create_optimize_attribute): Changed sprintf to an
+ if-statement.
+ (find_suffix): Changed a string copy to string concatination.
+ (replace_return_with_new_var): New function.
+ (elem_fn_build_array): Likewise.
+ (replace_array_ref_for_vec): Likewise.
+ (fix_elem_fn_return_value): Likewise.
+ (add_elem_fn_loop): Likewise.
+ (add_elem_fn_mask): Likewise.
+ (cg_hacks): Likewise.
+ (elem_fn_create_fn): Likewise.
+ * passes.c (init_optimization_passes): Removed elem_fn_pass.
+
2012-05-08 Balaji V. Iyer <balaji.v.iyer@intel.com>
* cilk.c (expand_builtin_cilk_detach): Added mode type for