===================================================================
@@ -6553,6 +6553,32 @@ invoke undefined behavior at runtime. W
accesses for vector subscription can be enabled with
@option{-Warray-bounds}.
+Vector shuffling is available using functions
+@code{__builtin_shuffle (vec, mask)} and
+@code{__builtin_shuffle (vec0, vec1, mask)}. Both functions construct
+a permutation of elements from one or two vectors and return a vector
+of the same type as input vector(s). The mask is a vector of
+integer-typed elements. The size of each element of the mask must be
+the same as the size of each input vector element. The number of
+elements in input vector(s) and mask must be the same.
+
+The elements of the input vectors are numbered from left to right across
+one or both of the vectors. Each element in the mask specifies a number
+of element from the input vector(s). Consider the following example.
+
+@smallexample
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,2,3,4@};
+v4si b = @{5,6,7,8@};
+v4si mask1 = @{0,1,1,3@};
+v4si mask2 = @{0,4,2,5@};
+v4si res;
+
+res = __builtin_shuffle (a, mask1); /* res is @{1,2,2,4@} */
+res = __builtin_shuffle2 (a, b, mask2); /* res is @{1,5,3,6@} */
+@end smallexample
+
You can declare variables and use them in function calls and returns, as
well as in assignments and some casts. You can specify a vector type as
a return type for a function. Vector types can also be used as function
===================================================================
@@ -2063,6 +2063,16 @@ dump_generic_node (pretty_printer *buffe
dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
pp_string (buffer, " > ");
break;
+
+ case VEC_SHUFFLE_EXPR:
+ pp_string (buffer, " VEC_SHUFFLE_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, " , ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (buffer, " , ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
case DOT_PROD_EXPR:
pp_string (buffer, " DOT_PROD_EXPR < ");
===================================================================
@@ -6530,6 +6530,79 @@ vector_compare_rtx (tree cond, bool unsi
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
}
+bool
+expand_vec_shuffle_expr_p (enum machine_mode mode, tree v0,
+ tree v1, tree mask)
+{
+#define inner_type_size(vec) \
+ GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (vec))))
+
+ if (TREE_CODE (mask) == VECTOR_CST
+ && targetm.vectorize.builtin_vec_perm_ok (TREE_TYPE (v0), mask))
+ return true;
+
+ if (v0 != v1 || inner_type_size (v0) != inner_type_size (mask))
+ return false;
+
+ return direct_optab_handler (vshuffle_optab, mode) != CODE_FOR_nothing;
+#undef inner_type_size
+}
+
+rtx
+expand_vec_shuffle_expr (tree type, tree v0, tree v1, tree mask, rtx target)
+{
+ struct expand_operand ops[4];
+ enum insn_code icode;
+ enum machine_mode mode = TYPE_MODE (type);
+ rtx rtx_v0, rtx_mask;
+
+ gcc_assert (expand_vec_shuffle_expr_p (mode, v0, v1, mask));
+
+ if (TREE_CODE (mask) == VECTOR_CST)
+ {
+ tree m_type, call;
+ tree fn = targetm.vectorize.builtin_vec_perm (TREE_TYPE (v0), &m_type);
+ rtx t;
+
+ if (!fn)
+ goto vshuffle;
+
+ if (m_type != TREE_TYPE (TREE_TYPE (mask)))
+ {
+ int units = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask));
+ tree cvt = build_vector_type (m_type, units);
+ mask = fold_convert (cvt, mask);
+ }
+
+ fn = copy_node (fn);
+ call = fold_build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn);
+ call = build_call_nary (type /* ? */, call, 3, v0, v1, mask);
+
+ t = expand_normal (call);
+ target = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, target, t));
+ return target;
+ }
+
+vshuffle:
+ gcc_assert (v1 == v0);
+
+ icode = direct_optab_handler (vshuffle_optab, mode);
+
+ if (icode == CODE_FOR_nothing)
+ return 0;
+
+ rtx_v0 = expand_normal (v0);
+ rtx_mask = expand_normal (mask);
+
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], rtx_v0, mode);
+ create_input_operand (&ops[2], rtx_mask, mode);
+ expand_insn (icode, 3, ops);
+
+ return ops[0].value;
+}
+
/* Return insn code for TYPE, the type of a VEC_COND_EXPR. */
static inline enum insn_code
===================================================================
@@ -630,6 +630,9 @@ enum direct_optab_index
DOI_vcond,
DOI_vcondu,
+ /* Vector shuffling. */
+ DOI_vshuffle,
+
/* Block move operation. */
DOI_movmem,
@@ -695,6 +698,7 @@ typedef struct direct_optab_d *direct_op
#define reload_out_optab (&direct_optab_table[(int) DOI_reload_out])
#define vcond_optab (&direct_optab_table[(int) DOI_vcond])
#define vcondu_optab (&direct_optab_table[(int) DOI_vcondu])
+#define vshuffle_optab (&direct_optab_table[(int) DOI_vshuffle])
#define movmem_optab (&direct_optab_table[(int) DOI_movmem])
#define setmem_optab (&direct_optab_table[(int) DOI_setmem])
#define cmpstr_optab (&direct_optab_table[(int) DOI_cmpstr])
@@ -864,8 +868,15 @@ extern rtx expand_widening_mult (enum ma
/* Return tree if target supports vector operations for COND_EXPR. */
bool expand_vec_cond_expr_p (tree, enum machine_mode);
+/* Return tree if target supports vector operations for VEC_SHUFFLE_EXPR. */
+bool expand_vec_shuffle_expr_p (enum machine_mode, tree, tree, tree);
+
/* Generate code for VEC_COND_EXPR. */
extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
+
+/* Generate code for VEC_SHUFFLE_EXPR. */
+extern rtx expand_vec_shuffle_expr (tree, tree, tree, tree, rtx);
+
/* Generate code for VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR. */
extern rtx expand_vec_shift_expr (sepops, rtx);
===================================================================
@@ -253,6 +253,7 @@ static const char * const optabs[] =
"set_optab_handler (vec_realign_load_optab, $A, CODE_FOR_$(vec_realign_load_$a$))",
"set_direct_optab_handler (vcond_optab, $A, CODE_FOR_$(vcond$a$))",
"set_direct_optab_handler (vcondu_optab, $A, CODE_FOR_$(vcondu$a$))",
+ "set_direct_optab_handler (vshuffle_optab, $A, CODE_FOR_$(vshuffle$a$))",
"set_optab_handler (ssum_widen_optab, $A, CODE_FOR_$(widen_ssum$I$a3$))",
"set_optab_handler (usum_widen_optab, $A, CODE_FOR_$(widen_usum$I$a3$))",
"set_optab_handler (udot_prod_optab, $A, CODE_FOR_$(udot_prod$I$a$))",
===================================================================
@@ -0,0 +1,44 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {5, 5,5,5,5,5,argc,7};
+ vector (8, short) v1 = {argc, 1,8,8,4,9,argc,4};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = __builtin_shuffle (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = __builtin_shuffle (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = __builtin_shuffle (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = __builtin_shuffle (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,50 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) y, vector (8, short) mask) {
+ return __builtin_shuffle (x, y, mask);
+}
+
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = f (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = f (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = f (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = f (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,46 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ /*vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) smask = {0,0,1,2,3,4,5,6};
+
+ v2 = __builtin_shuffle (v0, smask);
+ shufcompare (short, 8, v2, v0, smask);
+ v2 = __builtin_shuffle (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+ v2 = __builtin_shuffle (smask, v0);
+ shufcompare (short, 8, v2, smask, v0);*/
+
+ vector (4, int) i0 = {argc, 1,2,3};
+ vector (4, int) i1 = {argc, 1, argc, 3};
+ vector (4, int) i2;
+
+ vector (4, int) imask = {0,3,2,1};
+
+ /*i2 = __builtin_shuffle (i0, imask);
+ shufcompare (int, 4, i2, i0, imask);*/
+ i2 = __builtin_shuffle (i0, i1);
+ shufcompare (int, 4, i2, i0, i1);
+
+ i2 = __builtin_shuffle (imask, i0);
+ shufcompare (int, 4, i2, imask, i0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,36 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) mask) {
+ return __builtin_shuffle (x, mask);
+}
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) mask = {0,0,1,2,3,4,5,6};
+
+ v2 = f (v0, mask);
+ shufcompare (short, 8, v2, v0, mask);
+
+ v2 = f (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+
+ return 0;
+}
+
===================================================================
@@ -725,6 +725,8 @@ DEF_GCC_BUILTIN (BUILT_IN_VA_ARG_
DEF_EXT_LIB_BUILTIN (BUILT_IN__EXIT, "_exit", BT_FN_VOID_INT, ATTR_NORETURN_NOTHROW_LEAF_LIST)
DEF_C99_BUILTIN (BUILT_IN__EXIT2, "_Exit", BT_FN_VOID_INT, ATTR_NORETURN_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_SHUFFLE, "shuffle", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC)
+
/* Implementing nested functions. */
DEF_BUILTIN_STUB (BUILT_IN_INIT_TRAMPOLINE, "__builtin_init_trampoline")
DEF_BUILTIN_STUB (BUILT_IN_ADJUST_TRAMPOLINE, "__builtin_adjust_trampoline")
===================================================================
@@ -9913,6 +9913,11 @@ expand_expr_real_1 (tree exp, rtx target
case VEC_COND_EXPR:
target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
return target;
+
+ case VEC_SHUFFLE_EXPR:
+ target = expand_vec_shuffle_expr (type, treeop0, treeop1, treeop2, target);
+ return target;
+
case MODIFY_EXPR:
{
===================================================================
@@ -2815,6 +2815,68 @@ build_function_call_vec (location_t loc,
&& !check_builtin_function_arguments (fundecl, nargs, argarray))
return error_mark_node;
+ /* Typecheck a builtin function which is declared with variable
+ argument list. */
+ if (fundecl && DECL_BUILT_IN (fundecl)
+ && DECL_BUILT_IN_CLASS (fundecl) == BUILT_IN_NORMAL)
+ {
+ enum built_in_function fcode = DECL_FUNCTION_CODE (fundecl);
+ if (fcode == BUILT_IN_SHUFFLE)
+ {
+ tree firstarg = VEC_index (tree, params, 0);
+ tree mask = VEC_index (tree, params, nargs - 1);
+
+ if (nargs != 2 && nargs != 3)
+ {
+ error_at (loc, "__builtin_shuffle accepts 2 or 3 argumensts");
+ return error_mark_node;
+ }
+
+ if (TREE_CODE (TREE_TYPE (mask)) != VECTOR_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (mask))) != INTEGER_TYPE)
+ {
+ error_at (loc, "__builtin_shuffle last argument must "
+ "be an integer vector");
+ return error_mark_node;
+ }
+
+ if (TREE_CODE (TREE_TYPE (firstarg)) != VECTOR_TYPE
+ || (nargs == 3
+ && TREE_CODE (TREE_TYPE (VEC_index (tree, params, 1)))
+ != VECTOR_TYPE))
+ {
+ error_at (loc, "__builtin_shuffle arguments must be vectors");
+ return error_mark_node;
+ }
+
+ if ((TYPE_VECTOR_SUBPARTS (TREE_TYPE (firstarg))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)))
+ || (nargs == 3
+ && TYPE_VECTOR_SUBPARTS (
+ TREE_TYPE (VEC_index (tree, params, 1)))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask))))
+ {
+ error_at (loc, "__builtin_shuffle number of elements of the "
+ "argument vector(s) and the mask vector should "
+ "be the same");
+ return error_mark_node;
+ }
+
+ /* Here we change the return type of the builtin function
+ from int f(...) --> t f(...) where t is a type of the
+ first argument. */
+ fundecl = copy_node (fundecl);
+ TREE_TYPE (fundecl) = build_function_type (TREE_TYPE (firstarg),
+ TYPE_ARG_TYPES (TREE_TYPE (fundecl)));
+ function = build_fold_addr_expr (fundecl);
+ result = build_call_array_loc (loc, TREE_TYPE (firstarg),
+ function, nargs, argarray);
+ return require_complete_type (result);
+ }
+ }
+
+
+
/* Check that the arguments to the function are valid. */
check_function_arguments (fntype, nargs, argarray);
@@ -6120,10 +6182,17 @@ digest_init (location_t init_loc, tree t
tree value;
bool constant_p = true;
- /* Iterate through elements and check if all constructor
+ /* If constructor has less elements than the vector type. */
+ if (CONSTRUCTOR_NELTS (inside_init)
+ < TYPE_VECTOR_SUBPARTS (TREE_TYPE (inside_init)))
+ warning_at (init_loc, 0, "vector length does not match "
+ "initializer length, zero elements "
+ "will be inserted");
+
+ /* Iterate through elements and check if all constructor
elements are *_CSTs. */
FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (inside_init), ix, value)
- if (!CONSTANT_CLASS_P (value))
+ if (!CONSTANT_CLASS_P (value))
{
constant_p = false;
break;
===================================================================
@@ -7050,6 +7050,7 @@ gimplify_expr (tree *expr_p, gimple_seq
break;
case BIT_FIELD_REF:
+ case VEC_SHUFFLE_EXPR:
{
enum gimplify_status r0, r1, r2;
===================================================================
@@ -497,6 +497,14 @@ DEFTREECODE (COND_EXPR, "cond_expr", tcc
*/
DEFTREECODE (VEC_COND_EXPR, "vec_cond_expr", tcc_expression, 3)
+/* Vector shuffle expression. A = VEC_SHUFFLE_EXPR<v0, v1, maks>
+ means
+
+ freach i in length (mask):
+ A = mask[i] < length (v0) ? v0[mask[i]] : v1[mask[i]]
+*/
+DEFTREECODE (VEC_SHUFFLE_EXPR, "vec_shuffle_expr", tcc_expression, 3)
+
/* Declare local variables, including making RTL and allocating space.
BIND_EXPR_VARS is a chain of VAR_DECL nodes for the variables.
BIND_EXPR_BODY is the body, the expression to be computed using
===================================================================
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
#include "tree-pass.h"
#include "flags.h"
#include "ggc.h"
+#include "diagnostic.h"
/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
@@ -432,6 +433,280 @@ type_for_widest_vector_mode (enum machin
}
}
+
+/* Build a reference to the element of the vector VECT. Function
+ returns either the element itself, either BIT_FIELD_REF, or an
+ ARRAY_REF expression.
+
+ GSI is requred to insert temporary variables while building a
+ refernece to the element of the vector VECT.
+
+ PTMPVEC is a pointer to the temporary variable for caching
+ purposes. In case when PTMPVEC is NULL new temporary variable
+ will be created. */
+static tree
+vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
+{
+ tree type;
+ gimple asgn;
+ unsigned HOST_WIDE_INT maxval;
+ tree tmpvec;
+ tree indextype, arraytype;
+ bool need_asgn = true;
+
+ gcc_assert (TREE_CODE (TREE_TYPE (vect)) == VECTOR_TYPE);
+
+ type = TREE_TYPE (vect);
+ if (TREE_CODE (idx) == INTEGER_CST)
+ {
+ unsigned HOST_WIDE_INT index;
+
+ if (!host_integerp (idx, 1)
+ || (index = tree_low_cst (idx, 1)) > TYPE_VECTOR_SUBPARTS (type)-1)
+ return error_mark_node;
+
+ if (TREE_CODE (vect) == VECTOR_CST)
+ {
+ unsigned i;
+ tree vals = TREE_VECTOR_CST_ELTS (vect);
+ for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
+ if (i == index)
+ return TREE_VALUE (vals);
+ return error_mark_node;
+ }
+ else if (TREE_CODE (vect) == CONSTRUCTOR)
+ {
+ unsigned i;
+ VEC (constructor_elt, gc) *vals = CONSTRUCTOR_ELTS (vect);
+ constructor_elt *elt;
+
+ for (i = 0; VEC_iterate (constructor_elt, vals, i, elt); i++)
+ if (operand_equal_p (elt->index, idx, 0))
+ return elt->value;
+ return fold_convert (TREE_TYPE (type), integer_zero_node);
+ }
+ else if (TREE_CODE (vect) == SSA_NAME)
+ {
+ tree el;
+ gimple vectdef = SSA_NAME_DEF_STMT (vect);
+ if (gimple_assign_single_p (vectdef)
+ && (el = vector_element (gsi, gimple_assign_rhs1 (vectdef),
+ idx, ptmpvec))
+ != error_mark_node)
+ return el;
+ else
+ {
+ tree size = TYPE_SIZE (TREE_TYPE (type));
+ tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx),
+ idx, size);
+ return fold_build3 (BIT_FIELD_REF, TREE_TYPE (type),
+ vect, size, pos);
+ }
+ }
+ else
+ return error_mark_node;
+ }
+
+ if (!ptmpvec)
+ tmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else if (!*ptmpvec)
+ tmpvec = *ptmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else
+ {
+ tmpvec = *ptmpvec;
+ need_asgn = false;
+ }
+
+ if (need_asgn)
+ {
+ TREE_ADDRESSABLE (tmpvec) = 1;
+ asgn = gimple_build_assign (tmpvec, vect);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ }
+
+ maxval = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vect)) -1;
+ indextype = build_index_type (size_int (maxval));
+ arraytype = build_array_type (TREE_TYPE (type), indextype);
+
+ return build4 (ARRAY_REF, TREE_TYPE (type),
+ build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
+ idx, NULL_TREE, NULL_TREE);
+
+
+}
+
+/* Lower built-in vector shuffle function. Function can have two or
+ three arguments.
+ When function has two arguments: __builtin_shuffle (v0, mask),
+ the lowered version would be {v0[mask[0]], v0[mask[1]], ...}
+ MASK and V0 must have the same number of elements.
+
+ In case of three arguments: __builtin_shuffle (v0, v1, mask)
+ the lowered version would be:
+ {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
+ V0 and V1 must have the same type. MASK, V0, V1 must have the
+ same number of arguments. */
+static void
+lower_builtin_shuffle (gimple_stmt_iterator *gsi, location_t loc)
+{
+#define TRAP_RETURN(new_stmt, stmt, gsi, vec0) \
+do { \
+ new_stmt = gimple_build_call (built_in_decls[BUILT_IN_TRAP], 0); \
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); \
+ split_block (gimple_bb (new_stmt), new_stmt); \
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), vec0); \
+ gsi_replace (gsi, new_stmt, false); \
+ return; \
+} while (0)
+
+ gimple stmt = gsi_stmt (*gsi);
+ unsigned numargs = gimple_call_num_args (stmt);
+ tree mask = gimple_call_arg (stmt, numargs - 1);
+ tree vec0 = gimple_call_arg (stmt, 0);
+ tree vec1 = gimple_call_arg (stmt, 1);
+ unsigned els = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask));
+ tree type0 = TREE_TYPE (TREE_TYPE (vec0));
+ VEC(constructor_elt,gc) *v = NULL;
+ tree vectype, constr;
+ gimple new_stmt;
+ tree vec0tmp = NULL_TREE, masktmp = NULL_TREE;
+
+ if (expand_vec_shuffle_expr_p (TYPE_MODE (TREE_TYPE (vec0)), vec0, vec1, mask))
+ {
+ tree t;
+
+ t = gimplify_build3 (gsi, VEC_SHUFFLE_EXPR, TREE_TYPE (vec0),
+ vec0, vec1, mask);
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), t);
+ gsi_replace (gsi, new_stmt, false);
+
+ return;
+ }
+
+
+ if (numargs == 2)
+ {
+ unsigned i;
+ tree vec0tmp = NULL_TREE;
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, vecel, t;
+
+ idxval = vector_element (gsi, mask, size_int (i), &masktmp);
+ if (idxval == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ vecel = vector_element (gsi, vec0, idxval, &vec0tmp);
+ if (vecel == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ t = force_gimple_operand_gsi (gsi, vecel, true,
+ NULL_TREE, true, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), t);
+ }
+ }
+ else if (numargs == 3)
+ {
+ unsigned i;
+ tree var = create_tmp_var (type0, "vecel");
+ tree vec1tmp = NULL_TREE;
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, idx1val, cond, elval0, elval1, condexpr, t, ssatmp;
+ tree vec0el, vec1el;
+ gimple asgn;
+
+ idxval = vector_element (gsi, mask, size_int (i), &masktmp);
+ if (idxval == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ if (TREE_CODE (idxval) == INTEGER_CST)
+ {
+ if (tree_int_cst_lt (idxval, size_int (els)))
+ {
+ vec0el = vector_element (gsi, vec0, idxval, &vec0tmp);
+ t = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else if (tree_int_cst_lt (idxval, size_int (2*els)))
+ {
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+
+ vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp);
+ t = force_gimple_operand_gsi (gsi, vec1el, true,
+ NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+ }
+ else
+ {
+
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+ idx1val = force_gimple_operand_gsi (gsi, idx1val,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ cond = build2 (GT_EXPR, boolean_type_node, \
+ idxval, convert (type0, size_int (els - 1)));
+
+ vec0el = vector_element (gsi, vec0, idxval, &vec0tmp);
+ if (vec0el == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ elval0 = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp);
+ if (vec1el == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ elval1 = force_gimple_operand_gsi (gsi, vec1el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ condexpr = fold_build3 (COND_EXPR, type0, cond, \
+ elval1, elval0);
+
+ t = force_gimple_operand_gsi (gsi, condexpr, true, \
+ NULL_TREE, true, GSI_SAME_STMT);
+ }
+
+ asgn = gimple_build_assign (var, t);
+ ssatmp = make_ssa_name (var, asgn);
+ gimple_assign_set_lhs (asgn, ssatmp);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), ssatmp);
+ }
+ }
+
+ vectype = build_vector_type (type0, els);
+ constr = build_constructor (vectype, v);
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), constr);
+ gsi_replace (gsi, new_stmt, false);
+}
+
/* Process one statement. If we identify a vector operation, expand it. */
static void
@@ -445,6 +720,13 @@ expand_vector_operations_1 (gimple_stmt_
enum gimple_rhs_class rhs_class;
tree new_rhs;
+ if (gimple_call_builtin_p (stmt, BUILT_IN_SHUFFLE))
+ {
+ lower_builtin_shuffle (gsi, gimple_location (stmt));
+ gimple_set_modified (gsi_stmt (*gsi), true);
+ update_stmt (gsi_stmt (*gsi));
+ }
+
if (gimple_code (stmt) != GIMPLE_ASSIGN)
return;
@@ -612,10 +894,11 @@ expand_vector_operations_1 (gimple_stmt_
/* Use this to lower vector operations introduced by the vectorizer,
if it may need the bit-twiddling tricks implemented in this file. */
+
static bool
-gate_expand_vector_operations (void)
+gate_expand_vector_operations_noop (void)
{
- return flag_tree_vectorize != 0;
+ return optimize == 0;
}
static unsigned int
@@ -648,7 +931,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower", /* name */
- 0, /* gate */
+ gate_expand_vector_operations_noop, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -660,7 +943,8 @@ struct gimple_opt_pass pass_lower_vector
0, /* todo_flags_start */
TODO_update_ssa /* todo_flags_finish */
| TODO_verify_ssa
- | TODO_verify_stmts | TODO_verify_flow
+ | TODO_verify_stmts | TODO_verify_flow
+ | TODO_cleanup_cfg
}
};
@@ -669,7 +953,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower2", /* name */
- gate_expand_vector_operations, /* gate */
+ 0, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -682,6 +966,7 @@ struct gimple_opt_pass pass_lower_vector
TODO_update_ssa /* todo_flags_finish */
| TODO_verify_ssa
| TODO_verify_stmts | TODO_verify_flow
+ | TODO_cleanup_cfg
}
};
===================================================================
@@ -2623,6 +2623,7 @@ get_gimple_rhs_num_ops (enum tree_code c
|| (SYM) == ADDR_EXPR \
|| (SYM) == WITH_SIZE_EXPR \
|| (SYM) == SSA_NAME \
+ || (SYM) == VEC_SHUFFLE_EXPR \
|| (SYM) == VEC_COND_EXPR) ? GIMPLE_SINGLE_RHS \
: GIMPLE_INVALID_RHS),
#define END_OF_BASE_TREE_CODES (unsigned char) GIMPLE_INVALID_RHS,
===================================================================
@@ -1354,7 +1354,6 @@ init_optimization_passes (void)
NEXT_PASS (pass_vectorize);
{
struct opt_pass **p = &pass_vectorize.pass.sub;
- NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_dce_loop);
}
NEXT_PASS (pass_predcom);
@@ -1366,6 +1365,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_lim);
NEXT_PASS (pass_tree_loop_done);
}
+ NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_cse_reciprocals);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_vrp);
===================================================================
@@ -127,6 +127,12 @@ (define_mode_attr sseinsnmode
(V8SF "V8SF") (V4DF "V4DF")
(V4SF "V4SF") (V2DF "V2DF")])
+;; All 128bit vector modes
+(define_mode_attr sseshuffint
+ [(V16QI "V16QI") (V8HI "V8HI")
+ (V4SI "V4SI") (V2DI "V2DI")
+ (V4SF "V4SI") (V2DF "V2DI")])
+
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
@@ -5670,6 +5676,18 @@ (define_expand "vconduv2di"
DONE;
})
+(define_expand "vshuffle<mode>"
+ [(match_operand:V_128 0 "register_operand" "")
+ (match_operand:V_128 1 "general_operand" "")
+ (match_operand:<sseshuffint> 2 "general_operand" "")]
+ "TARGET_SSE3 || TARGET_AVX"
+{
+ bool ok = ix86_expand_vshuffle (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel bitwise logical operations
===================================================================
@@ -118,6 +118,7 @@ extern bool ix86_expand_int_movcc (rtx[]
extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
+extern bool ix86_expand_vshuffle (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
===================================================================
@@ -18703,6 +18703,96 @@ ix86_expand_int_vcond (rtx operands[])
return true;
}
+bool
+ix86_expand_vshuffle (rtx operands[])
+{
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx mask = operands[2];
+ rtx mm, vt, cv0, t1;
+ enum machine_mode mode = GET_MODE (op0);
+ enum machine_mode maskmode = GET_MODE (mask);
+ enum machine_mode maskinner = GET_MODE_INNER (mode);
+ rtx vec[16];
+ int w, i, j;
+
+ gcc_assert ((TARGET_SSE3 || TARGET_AVX) && GET_MODE_BITSIZE (mode) == 128);
+
+ op0 = force_reg (mode, op0);
+ mask = force_reg (maskmode, mask);
+
+ /* Number of elements in the vector. */
+ w = GET_MODE_BITSIZE (maskmode) / GET_MODE_BITSIZE (maskinner);
+
+ /* mask = mask & {w-1, w-1, w-1,...} */
+ for (i = 0; i < w; i++)
+ vec[i] = GEN_INT (w - 1);
+
+ mm = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+ mm = force_reg (maskmode, mm);
+
+ mask = gen_rtx_AND (maskmode, mask, mm);
+
+ /* Convert mask to vector of chars. */
+ mask = simplify_gen_subreg (V16QImode, mask, maskmode, 0);
+ mask = force_reg (V16QImode, mask);
+
+
+ /* Build a helper mask wich we will use in pshufb
+ (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
+ (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}
+ ... */
+ for (i = 0; i < w; i++)
+ for (j = 0; j < 16/w; j++)
+ vec[i*w+j] = GEN_INT (i*16/w);
+
+ vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ vt = force_reg (V16QImode, vt);
+
+ t1 = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ssse3_pshufbv16qi3 (t1, mask, vt));
+ mm = t1;
+
+ /* MM contains now something like
+ mm = {m[0], .., m[0], m[k], .., m[k], ... }, where
+ m[i] is an index of the element in the vector we are
+ selecting from.
+
+ Convert it into the byte positions by doing
+ mm = mm * {16/w, 16/w, ...}
+ mm = mm + {0,1,..,16/w, 0,1,..,16/w, ...} */
+ for (i = 0; i < 16; i++)
+ vec[i] = GEN_INT (16/w);
+
+ cv0 = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ cv0 = force_reg (V16QImode, cv0);
+ mm = gen_rtx_MULT (V16QImode, mm, cv0);
+
+ for (i = 0; i < w; i++)
+ for (j = 0; j < 16/w; j++)
+ vec[i*w+j] = GEN_INT (j);
+
+ cv0 = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ cv0 = force_reg (V16QImode, cv0);
+ mm = gen_rtx_PLUS (V16QImode, mm, cv0);
+ mm = force_reg (V16QImode, mm);
+
+ t1 = gen_reg_rtx (V16QImode);
+
+ /* Convert OP0 to vector of chars. */
+ op0 = simplify_gen_subreg (V16QImode, op0, mode, 0);
+ op0 = force_reg (V16QImode, op0);
+ emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mm));
+
+ /* Convert it back from vector of chars to the original mode. */
+ t1 = simplify_gen_subreg (mode, t1, V16QImode, 0);
+
+ emit_insn (gen_rtx_SET (VOIDmode, target, t1));
+
+ fprintf (stderr, "-- %s called\n", __func__);
+ return true;
+}
+
/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
true if we should do zero extension, else sign extension. HIGH_P is
true if we want the N/2 high elements, else the low elements. */
@@ -30297,6 +30387,9 @@ struct expand_vec_perm_d
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static int extract_vec_perm_cst (struct expand_vec_perm_d *, tree);
+static bool ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask);
+
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
@@ -33960,10 +34053,10 @@ ix86_vectorize_builtin_vec_perm_ok (tree
vec_mask = extract_vec_perm_cst (&d, mask);
- /* This hook is cannot be called in response to something that the
- user does (unlike the builtin expander) so we shouldn't ever see
- an error generated from the extract. */
- gcc_assert (vec_mask > 0 && vec_mask <= 3);
+ /* Check whether the mask can be applied to the vector type. */
+ if (vec_mask < 0 || vec_mask > 3)
+ return false;
+
one_vec = (vec_mask != 3);
/* Implementable with shufps or pshufd. */
===================================================================
@@ -943,6 +943,7 @@ get_expr_operands (gimple stmt, tree *ex
case COND_EXPR:
case VEC_COND_EXPR:
+ case VEC_SHUFFLE_EXPR:
get_expr_operands (stmt, &TREE_OPERAND (expr, 0), uflags);
get_expr_operands (stmt, &TREE_OPERAND (expr, 1), uflags);
get_expr_operands (stmt, &TREE_OPERAND (expr, 2), uflags);