===================================================================
@@ -6561,6 +6561,32 @@ invoke undefined behavior at runtime. W
accesses for vector subscription can be enabled with
@option{-Warray-bounds}.
+Vector shuffling is available using functions
+@code{__builtin_shuffle (vec, mask)} and
+@code{__builtin_shuffle (vec0, vec1, mask)}. Both functions construct
+a permutation of elements from one or two vectors and return a vector
+of the same type as input vector(s). The mask is a vector of
+integer-typed elements. The size of each element of the mask must be
+the same as the size of each input vector element. The number of
+elements in input vector(s) and mask must be the same.
+
+The elements of the input vectors are numbered from left to right across
+one or both of the vectors. Each element in the mask specifies a number
+of element from the input vector(s). Consider the following example.
+
+@smallexample
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,2,3,4@};
+v4si b = @{5,6,7,8@};
+v4si mask1 = @{0,1,1,3@};
+v4si mask2 = @{0,4,2,5@};
+v4si res;
+
+res = __builtin_shuffle (a, mask1); /* res is @{1,2,2,4@} */
+res = __builtin_shuffle (a, b, mask2); /* res is @{1,5,3,6@} */
+@end smallexample
+
You can declare variables and use them in function calls and returns, as
well as in assignments and some casts. You can specify a vector type as
a return type for a function. Vector types can also be used as function
===================================================================
@@ -2067,6 +2067,16 @@ dump_generic_node (pretty_printer *buffe
dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
pp_string (buffer, " > ");
break;
+
+ case VEC_SHUFFLE_EXPR:
+ pp_string (buffer, " VEC_SHUFFLE_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, " , ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (buffer, " , ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
case DOT_PROD_EXPR:
pp_string (buffer, " DOT_PROD_EXPR < ");
===================================================================
@@ -425,6 +425,7 @@ const struct c_common_resword c_common_r
{ "__attribute__", RID_ATTRIBUTE, 0 },
{ "__builtin_choose_expr", RID_CHOOSE_EXPR, D_CONLY },
{ "__builtin_complex", RID_BUILTIN_COMPLEX, D_CONLY },
+ { "__builtin_shuffle", RID_BUILTIN_SHUFFLE, D_CONLY },
{ "__builtin_offsetof", RID_OFFSETOF, 0 },
{ "__builtin_types_compatible_p", RID_TYPES_COMPATIBLE_P, D_CONLY },
{ "__builtin_va_arg", RID_VA_ARG, 0 },
===================================================================
@@ -103,7 +103,7 @@ enum rid
/* C extensions */
RID_ASM, RID_TYPEOF, RID_ALIGNOF, RID_ATTRIBUTE, RID_VA_ARG,
RID_EXTENSION, RID_IMAGPART, RID_REALPART, RID_LABEL, RID_CHOOSE_EXPR,
- RID_TYPES_COMPATIBLE_P, RID_BUILTIN_COMPLEX,
+ RID_TYPES_COMPATIBLE_P, RID_BUILTIN_COMPLEX, RID_BUILTIN_SHUFFLE,
RID_DFLOAT32, RID_DFLOAT64, RID_DFLOAT128,
RID_FRACT, RID_ACCUM,
===================================================================
@@ -6620,6 +6620,78 @@ vector_compare_rtx (tree cond, bool unsi
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
}
+/* Return true if VEC_SHUFFLE_EXPR can be expanded using SIMD extensions
+ of the CPU. */
+bool
+expand_vec_shuffle_expr_p (enum machine_mode mode, tree v0, tree v1, tree mask)
+{
+ int v0_mode_s = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (v0))));
+ int mask_mode_s = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (mask))));
+
+ if (TREE_CODE (mask) == VECTOR_CST
+ && targetm.vectorize.builtin_vec_perm_ok (TREE_TYPE (v0), mask))
+ return true;
+
+ if (!operand_equal_p (v0, v1, 0) || v0_mode_s != mask_mode_s
+ || TYPE_VECTOR_SUBPARTS (TREE_TYPE (v0))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)))
+ return false;
+
+ return direct_optab_handler (vshuffle_optab, mode) != CODE_FOR_nothing;
+}
+
+/* Generate instructions for VEC_COND_EXPR given its type and three
+ operands. */
+rtx
+expand_vec_shuffle_expr (tree type, tree v0, tree v1, tree mask, rtx target)
+{
+ struct expand_operand ops[4];
+ enum insn_code icode;
+ enum machine_mode mode = TYPE_MODE (type);
+ rtx rtx_v0, rtx_mask;
+
+ gcc_assert (expand_vec_shuffle_expr_p (mode, v0, v1, mask));
+
+ if (TREE_CODE (mask) == VECTOR_CST)
+ {
+ tree m_type, call;
+ tree fn = targetm.vectorize.builtin_vec_perm (TREE_TYPE (v0), &m_type);
+
+ if (!fn)
+ goto vshuffle;
+
+ if (m_type != TREE_TYPE (TREE_TYPE (mask)))
+ {
+ int units = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask));
+ tree cvt = build_vector_type (m_type, units);
+ mask = fold_convert (cvt, mask);
+ }
+
+ call = fold_build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn);
+ call = build_call_nary (type, call, 3, v0, v1, mask);
+
+ return expand_expr_real_1 (call, target, VOIDmode, EXPAND_NORMAL, NULL);
+ }
+
+vshuffle:
+ gcc_assert (operand_equal_p (v0, v1, 0));
+
+ icode = direct_optab_handler (vshuffle_optab, mode);
+
+ if (icode == CODE_FOR_nothing)
+ return 0;
+
+ rtx_v0 = expand_normal (v0);
+ rtx_mask = expand_normal (mask);
+
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], rtx_v0, mode);
+ create_input_operand (&ops[2], rtx_mask, mode);
+ expand_insn (icode, 3, ops);
+
+ return ops[0].value;
+}
+
/* Return insn code for TYPE, the type of a VEC_COND_EXPR. */
static inline enum insn_code
===================================================================
@@ -636,6 +636,9 @@ enum direct_optab_index
DOI_vcond,
DOI_vcondu,
+ /* Vector shuffling. */
+ DOI_vshuffle,
+
/* Block move operation. */
DOI_movmem,
@@ -701,6 +704,7 @@ typedef struct direct_optab_d *direct_op
#define reload_out_optab (&direct_optab_table[(int) DOI_reload_out])
#define vcond_optab (&direct_optab_table[(int) DOI_vcond])
#define vcondu_optab (&direct_optab_table[(int) DOI_vcondu])
+#define vshuffle_optab (&direct_optab_table[(int) DOI_vshuffle])
#define movmem_optab (&direct_optab_table[(int) DOI_movmem])
#define setmem_optab (&direct_optab_table[(int) DOI_setmem])
#define cmpstr_optab (&direct_optab_table[(int) DOI_cmpstr])
@@ -879,8 +883,15 @@ extern rtx expand_widening_mult (enum ma
/* Return tree if target supports vector operations for COND_EXPR. */
bool expand_vec_cond_expr_p (tree, enum machine_mode);
+/* Return tree if target supports vector operations for VEC_SHUFFLE_EXPR. */
+bool expand_vec_shuffle_expr_p (enum machine_mode, tree, tree, tree);
+
/* Generate code for VEC_COND_EXPR. */
extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
+
+/* Generate code for VEC_SHUFFLE_EXPR. */
+extern rtx expand_vec_shuffle_expr (tree, tree, tree, tree, rtx);
+
/* Generate code for VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR. */
extern rtx expand_vec_shift_expr (sepops, rtx);
===================================================================
@@ -255,6 +255,7 @@ static const char * const optabs[] =
"set_optab_handler (vec_realign_load_optab, $A, CODE_FOR_$(vec_realign_load_$a$))",
"set_direct_optab_handler (vcond_optab, $A, CODE_FOR_$(vcond$a$))",
"set_direct_optab_handler (vcondu_optab, $A, CODE_FOR_$(vcondu$a$))",
+ "set_direct_optab_handler (vshuffle_optab, $A, CODE_FOR_$(vshuffle$a$))",
"set_optab_handler (ssum_widen_optab, $A, CODE_FOR_$(widen_ssum$I$a3$))",
"set_optab_handler (usum_widen_optab, $A, CODE_FOR_$(widen_usum$I$a3$))",
"set_optab_handler (udot_prod_optab, $A, CODE_FOR_$(udot_prod$I$a$))",
===================================================================
@@ -0,0 +1,44 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {5, 5,5,5,5,5,argc,7};
+ vector (8, short) v1 = {argc, 1,8,8,4,9,argc,4};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = __builtin_shuffle (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = __builtin_shuffle (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = __builtin_shuffle (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = __builtin_shuffle (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,50 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) y, vector (8, short) mask) {
+ return __builtin_shuffle (x, y, mask);
+}
+
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = f (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = f (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = f (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = f (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,46 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ /*vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) smask = {0,0,1,2,3,4,5,6};
+
+ v2 = __builtin_shuffle (v0, smask);
+ shufcompare (short, 8, v2, v0, smask);
+ v2 = __builtin_shuffle (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+ v2 = __builtin_shuffle (smask, v0);
+ shufcompare (short, 8, v2, smask, v0);*/
+
+ vector (4, int) i0 = {argc, 1,2,3};
+ vector (4, int) i1 = {argc, 1, argc, 3};
+ vector (4, int) i2;
+
+ vector (4, int) imask = {0,3,2,1};
+
+ /*i2 = __builtin_shuffle (i0, imask);
+ shufcompare (int, 4, i2, i0, imask);*/
+ i2 = __builtin_shuffle (i0, i1);
+ shufcompare (int, 4, i2, i0, i1);
+
+ i2 = __builtin_shuffle (imask, i0);
+ shufcompare (int, 4, i2, imask, i0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,36 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) mask) {
+ return __builtin_shuffle (x, mask);
+}
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) mask = {0,0,1,2,3,4,5,6};
+
+ v2 = f (v0, mask);
+ shufcompare (short, 8, v2, v0, mask);
+
+ v2 = f (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,64 @@
+/* Test that different type variants are compatible within
+ vector shuffling. */
+
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define shufcompare(count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vres[__i] != v0[mask[__i]]) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+#define test_compat_mask(res, vec, mask) \
+ res = __builtin_shuffle (vec, mask); \
+ shufcompare(4, res, vec, mask); \
+ res = __builtin_shuffle (vec, c ## mask); \
+ shufcompare(4, res, vec, c ## mask); \
+ res = __builtin_shuffle (vec, r ## mask); \
+ shufcompare(4, res, vec, r ## mask); \
+ res = __builtin_shuffle (vec, d ## mask); \
+ shufcompare(4, res, vec, d ## mask); \
+ res = __builtin_shuffle (vec, dc ## mask); \
+ shufcompare(4, res, vec, dc ## mask); \
+
+#define test_compat_vec(res, vec, mask) \
+ test_compat_mask (res, vec, mask); \
+ test_compat_mask (res, c ## vec, mask); \
+ test_compat_mask (res, r ## vec, mask); \
+ test_compat_mask (res, d ## vec, mask); \
+ test_compat_mask (res, dc ## vec, mask);
+
+#define test_compat(res, vec, mask) \
+ test_compat_vec (res, vec, mask); \
+ test_compat_vec (d ## res, vec, mask); \
+ test_compat_vec (r ## res, vec, mask);
+
+typedef vector (4, int) v4si;
+typedef const vector (4, int) v4sicst;
+
+int main (int argc, char *argv[]) {
+ vector (4, int) vec = {argc, 1,2,3};
+ const vector (4, int) cvec = {argc, 1,2,3};
+ register vector (4, int) rvec = {argc, 1,2,3};
+ v4si dvec = {argc, 1,2,3};
+ v4sicst dcvec = {argc, 1,2,3};
+
+ vector (4, int) res;
+ v4si dres;
+ register vector (4, int) rres;
+
+ vector (4, int) mask = {0,3,2,1};
+ const vector (4, int) cmask = {0,3,2,1};
+ register vector (4, int) rmask = {0,3,2,1};
+ v4si dmask = {0,3,2,1};
+ v4sicst dcmask = {0,3,2,1};
+
+ test_compat (res, vec, mask);
+
+ return 0;
+}
+
===================================================================
@@ -19,8 +19,8 @@ _Complex float fc3 = __builtin_complex (
void
f (void)
{
- __builtin_complex (0.0); /* { dg-error "expected" } */
- __builtin_complex (0.0, 0.0, 0.0); /* { dg-error "expected" } */
+ __builtin_complex (0.0); /* { dg-error "wrong number of arguments" } */
+ __builtin_complex (0.0, 0.0, 0.0); /* { dg-error "wrong number of arguments" } */
}
-void (*p) (void) = __builtin_complex; /* { dg-error "expected" } */
+void (*p) (void) = __builtin_complex; /* { dg-error "cannot take address" } */
===================================================================
@@ -579,6 +579,7 @@ extern tree c_begin_omp_task (void);
extern tree c_finish_omp_task (location_t, tree, tree);
extern tree c_finish_omp_clauses (tree);
extern tree c_build_va_arg (location_t, tree, tree);
+extern tree c_build_vec_shuffle_expr (location_t, tree, tree, tree);
/* Set to 0 at beginning of a function definition, set to 1 if
a return statement that specifies a return value is seen. */
===================================================================
@@ -8605,6 +8605,10 @@ expand_expr_real_2 (sepops ops, rtx targ
case VEC_PACK_FIX_TRUNC_EXPR:
mode = TYPE_MODE (TREE_TYPE (treeop0));
goto binop;
+
+ case VEC_SHUFFLE_EXPR:
+ target = expand_vec_shuffle_expr (type, treeop0, treeop1, treeop2, target);
+ return target;
case DOT_PROD_EXPR:
{
===================================================================
@@ -417,6 +417,16 @@ dump_ternary_rhs (pretty_printer *buffer
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
pp_string (buffer, ">");
break;
+
+ case VEC_SHUFFLE_EXPR:
+ pp_string (buffer, "VEC_SHUFFLE_EXPR <");
+ dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+ pp_string (buffer, ">");
+ break;
case REALIGN_LOAD_EXPR:
pp_string (buffer, "REALIGN_LOAD <");
===================================================================
@@ -2307,7 +2307,7 @@ build_array_ref (location_t loc, tree ar
if (TREE_CODE (TREE_TYPE (index)) != ARRAY_TYPE
&& TREE_CODE (TREE_TYPE (index)) != POINTER_TYPE)
{
- error_at (loc,
+ error_at (loc,
"subscripted value is neither array nor pointer nor vector");
return error_mark_node;
@@ -2339,8 +2339,8 @@ build_array_ref (location_t loc, tree ar
index = default_conversion (index);
gcc_assert (TREE_CODE (TREE_TYPE (index)) == INTEGER_TYPE);
-
- /* For vector[index], convert the vector to a
+
+ /* For vector[index], convert the vector to a
pointer of the underlying type. */
if (TREE_CODE (TREE_TYPE (array)) == VECTOR_TYPE)
{
@@ -2348,11 +2348,11 @@ build_array_ref (location_t loc, tree ar
tree type1;
if (TREE_CODE (index) == INTEGER_CST)
- if (!host_integerp (index, 1)
- || ((unsigned HOST_WIDE_INT) tree_low_cst (index, 1)
+ if (!host_integerp (index, 1)
+ || ((unsigned HOST_WIDE_INT) tree_low_cst (index, 1)
>= TYPE_VECTOR_SUBPARTS (TREE_TYPE (array))))
warning_at (loc, OPT_Warray_bounds, "index value is out of bound");
-
+
c_common_mark_addressable_vec (array);
type = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
type = build_pointer_type (type);
@@ -2845,6 +2845,99 @@ build_function_call_vec (location_t loc,
}
return require_complete_type (result);
}
+
+/* Build a VEC_SHUFFLE_EXPR if V0, V1 and MASK are not error_mark_nodes
+ and have vector types, V0 has the same type as V1, and the number of
+ elements of V0, V1, MASK is the same.
+
+ In case V1 is a NULL_TREE it is assumed that __builtin_shuffle was
+ called with two arguments. In this case implementation passes the
+ first argument twice in order to share the same tree code. This fact
+ could enable the mask-values being twice the vector length. This is
+ an implementation accident and this semantics is not guaranteed to
+ the user. */
+tree
+c_build_vec_shuffle_expr (location_t loc, tree v0, tree v1, tree mask)
+{
+ tree vec_shuffle, tmp;
+ bool wrap = true;
+ bool maybe_const = false;
+ bool two_arguments;
+
+ if (v1 == NULL_TREE)
+ {
+ two_arguments = true;
+ v1 = v0;
+ }
+
+ if (v0 == error_mark_node || v1 == error_mark_node
+ || mask == error_mark_node)
+ return error_mark_node;
+
+ if (TREE_CODE (TREE_TYPE (mask)) != VECTOR_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (mask))) != INTEGER_TYPE)
+ {
+ error_at (loc, "__builtin_shuffle last argument must "
+ "be an integer vector");
+ return error_mark_node;
+ }
+
+ if (TREE_CODE (TREE_TYPE (v0)) != VECTOR_TYPE
+ || TREE_CODE (TREE_TYPE (v1)) != VECTOR_TYPE)
+ {
+ error_at (loc, "__builtin_shuffle arguments must be vectors");
+ return error_mark_node;
+ }
+
+ if (TYPE_MAIN_VARIANT (TREE_TYPE (v0)) != TYPE_MAIN_VARIANT (TREE_TYPE (v1)))
+ {
+ error_at (loc, "__builtin_shuffle argument vectors must be of "
+ "the same type");
+ return error_mark_node;
+ }
+
+ if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (v0))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask))
+ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (v1))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)))
+ {
+ error_at (loc, "__builtin_shuffle number of elements of the "
+ "argument vector(s) and the mask vector should "
+ "be the same");
+ return error_mark_node;
+ }
+
+ if (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (v0))))
+ != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (mask)))))
+ {
+ error_at (loc, "__builtin_shuffle argument vector(s) inner type "
+ "must have the same size as inner type of the mask");
+ return error_mark_node;
+ }
+
+ /* Avoid C_MAYBE_CONST_EXPRs inside VEC_SHUFFLE_EXPR. */
+ tmp = c_fully_fold (v0, false, &maybe_const);
+ v0 = save_expr (tmp);
+ wrap &= maybe_const;
+
+ if (!two_arguments)
+ {
+ v1 = c_fully_fold (v1, false, &maybe_const);
+ wrap &= maybe_const;
+ }
+ else
+ v1 = v0;
+
+ mask = c_fully_fold (mask, false, &maybe_const);
+ wrap &= maybe_const;
+
+ vec_shuffle = build3 (VEC_SHUFFLE_EXPR, TREE_TYPE (v0), v0, v1, mask);
+
+ if (!wrap)
+ vec_shuffle = c_wrap_maybe_const (vec_shuffle, true);
+
+ return vec_shuffle;
+}
/* Convert the argument expressions in the vector VALUES
to the types in the list TYPELIST.
@@ -3167,7 +3260,7 @@ convert_arguments (tree typelist, VEC(tr
if (typetail != 0 && TREE_VALUE (typetail) != void_type_node)
{
- error_at (input_location,
+ error_at (input_location,
"too few arguments to function %qE", function);
if (fundecl && !DECL_BUILT_IN (fundecl))
inform (DECL_SOURCE_LOCATION (fundecl), "declared here");
@@ -3566,7 +3659,7 @@ build_unary_op (location_t location,
/* Complain about anything that is not a true lvalue. In
Objective-C, skip this check for property_refs. */
- if (!objc_is_property_ref (arg)
+ if (!objc_is_property_ref (arg)
&& !lvalue_or_else (location,
arg, ((code == PREINCREMENT_EXPR
|| code == POSTINCREMENT_EXPR)
@@ -3683,7 +3776,7 @@ build_unary_op (location_t location,
need to ask Objective-C to build the increment or decrement
expression for it. */
if (objc_is_property_ref (arg))
- return objc_build_incr_expr_for_property_ref (location, code,
+ return objc_build_incr_expr_for_property_ref (location, code,
arg, inc);
/* Report a read-only lvalue. */
@@ -5926,7 +6019,7 @@ void
pedwarn_init (location_t location, int opt, const char *gmsgid)
{
char *ofwhat;
-
+
/* The gmsgid may be a format string with %< and %>. */
pedwarn (location, opt, gmsgid);
ofwhat = print_spelling ((char *) alloca (spelling_length () + 1));
@@ -9344,8 +9437,8 @@ scalar_to_vector (location_t loc, enum t
tree type1 = TREE_TYPE (op1);
bool integer_only_op = false;
enum stv_conv ret = stv_firstarg;
-
- gcc_assert (TREE_CODE (type0) == VECTOR_TYPE
+
+ gcc_assert (TREE_CODE (type0) == VECTOR_TYPE
|| TREE_CODE (type1) == VECTOR_TYPE);
switch (code)
{
@@ -9370,7 +9463,7 @@ scalar_to_vector (location_t loc, enum t
case BIT_AND_EXPR:
integer_only_op = true;
/* ... fall through ... */
-
+
case PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
@@ -9387,7 +9480,7 @@ scalar_to_vector (location_t loc, enum t
}
if (TREE_CODE (type0) == INTEGER_TYPE
- && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE)
+ && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE)
{
if (unsafe_conversion_p (TREE_TYPE (type1), op0, false))
{
@@ -9399,7 +9492,7 @@ scalar_to_vector (location_t loc, enum t
}
else if (!integer_only_op
/* Allow integer --> real conversion if safe. */
- && (TREE_CODE (type0) == REAL_TYPE
+ && (TREE_CODE (type0) == REAL_TYPE
|| TREE_CODE (type0) == INTEGER_TYPE)
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (type1)))
{
@@ -9414,7 +9507,7 @@ scalar_to_vector (location_t loc, enum t
default:
break;
}
-
+
return stv_nothing;
}
@@ -9529,8 +9622,8 @@ build_binary_op (location_t location, en
int_const = int_const_or_overflow = false;
/* Do not apply default conversion in mixed vector/scalar expression. */
- if (convert_p
- && !((TREE_CODE (TREE_TYPE (op0)) == VECTOR_TYPE)
+ if (convert_p
+ && !((TREE_CODE (TREE_TYPE (op0)) == VECTOR_TYPE)
!= (TREE_CODE (TREE_TYPE (op1)) == VECTOR_TYPE)))
{
op0 = default_conversion (op0);
@@ -9608,7 +9701,7 @@ build_binary_op (location_t location, en
if ((code0 == VECTOR_TYPE) != (code1 == VECTOR_TYPE))
{
enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1);
-
+
switch (convert_flag)
{
case stv_error:
@@ -9949,7 +10042,7 @@ build_binary_op (location_t location, en
{
if (code == EQ_EXPR)
warning_at (location,
- OPT_Waddress,
+ OPT_Waddress,
"the comparison will always evaluate as %<false%> "
"for the address of %qD will never be NULL",
TREE_OPERAND (op1, 0));
===================================================================
@@ -7286,6 +7286,7 @@ gimplify_expr (tree *expr_p, gimple_seq
}
case FMA_EXPR:
+ case VEC_SHUFFLE_EXPR:
/* Classified as tcc_expression. */
goto expr_3;
===================================================================
@@ -497,6 +497,19 @@ DEFTREECODE (COND_EXPR, "cond_expr", tcc
*/
DEFTREECODE (VEC_COND_EXPR, "vec_cond_expr", tcc_expression, 3)
+/* Vector shuffle expression. A = VEC_SHUFFLE_EXPR<v0, v1, mask>
+ means
+
+ foreach i in length (mask):
+ A = mask[i] < length (v0) ? v0[mask[i]] : v1[mask[i] - length (mask)]
+
+ V0 and V1 are vectors of the same type. MASK is an integer-typed
+ vector. The number of MASK elements must be the same with the
+ number of elements in V0 and V1. The size of the inner type
+ of the MASK and of the V0 and V1 must be the same.
+*/
+DEFTREECODE (VEC_SHUFFLE_EXPR, "vec_shuffle_expr", tcc_expression, 3)
+
/* Declare local variables, including making RTL and allocating space.
BIND_EXPR_VARS is a chain of VAR_DECL nodes for the variables.
BIND_EXPR_BODY is the body, the expression to be computed using
===================================================================
@@ -3285,6 +3285,7 @@ estimate_operator_cost (enum tree_code c
??? We may consider mapping RTL costs to this. */
case COND_EXPR:
case VEC_COND_EXPR:
+ case VEC_SHUFFLE_EXPR:
case PLUS_EXPR:
case POINTER_PLUS_EXPR:
===================================================================
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
#include "tree-pass.h"
#include "flags.h"
#include "ggc.h"
+#include "diagnostic.h"
/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
@@ -326,10 +327,10 @@ uniform_vector_p (tree vec)
}
if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
return NULL_TREE;
-
+
return first;
}
-
+
return NULL_TREE;
}
@@ -432,6 +433,263 @@ type_for_widest_vector_mode (enum machin
}
}
+
+/* Build a reference to the element of the vector VECT. Function
+ returns either the element itself, either BIT_FIELD_REF, or an
+ ARRAY_REF expression.
+
+ GSI is requred to insert temporary variables while building a
+ refernece to the element of the vector VECT.
+
+ PTMPVEC is a pointer to the temporary variable for caching
+ purposes. In case when PTMPVEC is NULL new temporary variable
+ will be created. */
+static tree
+vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
+{
+ tree type;
+ gimple asgn;
+ tree tmpvec;
+ tree arraytype;
+ bool need_asgn = true;
+
+ gcc_assert (TREE_CODE (TREE_TYPE (vect)) == VECTOR_TYPE);
+
+ type = TREE_TYPE (vect);
+ if (TREE_CODE (idx) == INTEGER_CST)
+ {
+ unsigned HOST_WIDE_INT index;
+
+ if (!host_integerp (idx, 1)
+ || (index = tree_low_cst (idx, 1)) > TYPE_VECTOR_SUBPARTS (type)-1)
+ return error_mark_node;
+
+ if (TREE_CODE (vect) == VECTOR_CST)
+ {
+ unsigned i;
+ tree vals = TREE_VECTOR_CST_ELTS (vect);
+ for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
+ if (i == index)
+ return TREE_VALUE (vals);
+ return error_mark_node;
+ }
+ else if (TREE_CODE (vect) == CONSTRUCTOR)
+ {
+ unsigned i;
+ VEC (constructor_elt, gc) *vals = CONSTRUCTOR_ELTS (vect);
+ constructor_elt *elt;
+
+ for (i = 0; VEC_iterate (constructor_elt, vals, i, elt); i++)
+ if (operand_equal_p (elt->index, idx, 0))
+ return elt->value;
+ return fold_convert (TREE_TYPE (type), integer_zero_node);
+ }
+ else if (TREE_CODE (vect) == SSA_NAME)
+ {
+ tree size = TYPE_SIZE (TREE_TYPE (type));
+ tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx), idx, size);
+ return fold_build3 (BIT_FIELD_REF, TREE_TYPE (type), vect, size, pos);
+ }
+ else
+ return error_mark_node;
+ }
+
+ if (!ptmpvec)
+ tmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else if (!*ptmpvec)
+ tmpvec = *ptmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else
+ {
+ tmpvec = *ptmpvec;
+ need_asgn = false;
+ }
+
+ if (need_asgn)
+ {
+ TREE_ADDRESSABLE (tmpvec) = 1;
+ asgn = gimple_build_assign (tmpvec, vect);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ }
+
+ arraytype = build_array_type_nelts (TREE_TYPE (type),
+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (vect)));
+
+ return build4 (ARRAY_REF, TREE_TYPE (type),
+ build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
+ idx, NULL_TREE, NULL_TREE);
+}
+
+/* Check if VEC_SHUFFLE_EXPR within the given setting is supported
+ by hardware, or lower it piecewise. Function returns false when
+ the expression must be replaced with TRAP_RETURN, true otherwise.
+
+ When VEC_SHUFFLE_EXPR has the same first and second operands:
+ VEC_SHUFFLE_EXPR <v0, v0, mask> the lowered version would be
+ {v0[mask[0]], v0[mask[1]], ...}
+ MASK and V0 must have the same number of elements.
+
+ Otherwise VEC_SHUFFLE_EXPR <v0, v1, mask> is lowered to
+ {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
+ V0 and V1 must have the same type. MASK, V0, V1 must have the
+ same number of arguments. */
+static bool
+lower_vec_shuffle (gimple_stmt_iterator *gsi, location_t loc)
+{
+
+ gimple stmt = gsi_stmt (*gsi);
+ tree mask = gimple_assign_rhs3 (stmt);
+ tree vec0 = gimple_assign_rhs1 (stmt);
+ tree vec1 = gimple_assign_rhs2 (stmt);
+ unsigned els = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask));
+ tree type0 = TREE_TYPE (TREE_TYPE (vec0));
+ VEC(constructor_elt,gc) *v = NULL;
+ tree vectype, constr;
+ tree vec0tmp = NULL_TREE, masktmp = NULL_TREE;
+
+ if (expand_vec_shuffle_expr_p (TYPE_MODE (TREE_TYPE (vec0)), vec0, vec1, mask))
+ {
+ tree t;
+
+ t = gimplify_build3 (gsi, VEC_SHUFFLE_EXPR, TREE_TYPE (vec0),
+ vec0, vec1, mask);
+ gimple_assign_set_rhs_from_tree (gsi, t);
+ /* Statement should be updated by callee. */
+ return true;
+ }
+
+ if (operand_equal_p (vec0, vec1, 0))
+ {
+ unsigned i;
+ tree vec0tmp = NULL_TREE;
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, vecel, t;
+
+ idxval = vector_element (gsi, mask, size_int (i), &masktmp);
+ if (idxval == error_mark_node)
+ {
+ if (warning_at (loc, 0, "Invalid shuffling mask index %i", i))
+ inform (loc, "if this code is reached the programm will abort");
+ return false;
+ }
+
+ vecel = vector_element (gsi, vec0, idxval, &vec0tmp);
+ if (vecel == error_mark_node)
+ {
+ if (warning_at (loc, 0, "Invalid shuffling arguments"))
+ inform (loc, "if this code is reached the programm will abort");
+ return false;
+ }
+
+ t = force_gimple_operand_gsi (gsi, vecel, true,
+ NULL_TREE, true, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), t);
+ }
+ }
+ else
+ {
+ unsigned i;
+ tree var = create_tmp_var (type0, "vecel");
+ tree vec1tmp = NULL_TREE;
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, idx1val, cond, elval0, elval1, condexpr, t, ssatmp;
+ tree vec0el, vec1el;
+ gimple asgn;
+
+ idxval = vector_element (gsi, mask, size_int (i), &masktmp);
+ if (idxval == error_mark_node)
+ {
+ if (warning_at (loc, 0, "Invalid shuffling mask index %i", i))
+ inform (loc, "if this code is reached the programm will abort");
+ return false;
+ }
+
+ if (TREE_CODE (idxval) == INTEGER_CST)
+ {
+ if (tree_int_cst_lt (idxval, size_int (els)))
+ {
+ vec0el = vector_element (gsi, vec0, idxval, &vec0tmp);
+ t = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else if (tree_int_cst_lt (idxval, size_int (2*els)))
+ {
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+
+ vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp);
+ t = force_gimple_operand_gsi (gsi, vec1el, true,
+ NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else
+ {
+ if (warning_at (loc, 0, "Invalid shuffling mask index %i", i))
+ inform (loc, "if this code is reached the "
+ "programm will abort");
+ return false;
+ }
+ }
+ else
+ {
+
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+ idx1val = force_gimple_operand_gsi (gsi, idx1val,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ cond = fold_build2 (GT_EXPR, boolean_type_node, \
+ idxval, fold_convert (type0, size_int (els - 1)));
+
+ vec0el = vector_element (gsi, vec0, idxval, &vec0tmp);
+ if (vec0el == error_mark_node)
+ {
+ if (warning_at (loc, 0, "Invalid shuffling arguments"))
+ inform (loc, "if this code is reached the "
+ "programm will abort");
+ return false;
+ }
+
+ elval0 = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp);
+ if (vec1el == error_mark_node)
+ {
+ if (warning_at (loc, 0, "Invalid shuffling arguments"))
+ inform (loc, "if this code is reached the "
+ "programm will abort");
+ return false;
+ }
+
+ elval1 = force_gimple_operand_gsi (gsi, vec1el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ condexpr = fold_build3 (COND_EXPR, type0, cond, \
+ elval1, elval0);
+
+ t = force_gimple_operand_gsi (gsi, condexpr, true, \
+ NULL_TREE, true, GSI_SAME_STMT);
+ }
+
+ asgn = gimple_build_assign (var, t);
+ ssatmp = make_ssa_name (var, asgn);
+ gimple_assign_set_lhs (asgn, ssatmp);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), ssatmp);
+ }
+ }
+
+ vectype = build_vector_type (type0, els);
+ constr = build_constructor (vectype, v);
+ gimple_assign_set_rhs_from_tree (gsi, constr);
+ /* Statement should be updated by callee. */
+ return true;
+}
+
/* Process one statement. If we identify a vector operation, expand it. */
static void
@@ -451,6 +709,25 @@ expand_vector_operations_1 (gimple_stmt_
code = gimple_assign_rhs_code (stmt);
rhs_class = get_gimple_rhs_class (code);
+ if (code == VEC_SHUFFLE_EXPR)
+ {
+ if (!lower_vec_shuffle (gsi, gimple_location (stmt)))
+ {
+ gimple new_stmt;
+ tree vec0;
+
+ vec0 = gimple_assign_rhs1 (stmt);
+ new_stmt = gimple_build_call (built_in_decls[BUILT_IN_TRAP], 0);
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ split_block (gimple_bb (new_stmt), new_stmt);
+ new_stmt = gimple_build_assign (gimple_assign_lhs (stmt), vec0);
+ gsi_replace (gsi, new_stmt, false);
+ }
+
+ gimple_set_modified (gsi_stmt (*gsi), true);
+ update_stmt (gsi_stmt (*gsi));
+ }
+
if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
return;
@@ -485,9 +762,9 @@ expand_vector_operations_1 (gimple_stmt_
{
bool vector_scalar_shift;
op = optab_for_tree_code (code, type, optab_scalar);
-
+
/* Vector/Scalar shift is supported. */
- vector_scalar_shift = (op && (optab_handler (op, TYPE_MODE (type))
+ vector_scalar_shift = (op && (optab_handler (op, TYPE_MODE (type))
!= CODE_FOR_nothing));
/* If the 2nd argument is vector, we need a vector/vector shift.
@@ -500,10 +777,10 @@ expand_vector_operations_1 (gimple_stmt_
/* Check whether we have vector <op> {x,x,x,x} where x
could be a scalar variable or a constant. Transform
vector <op> {x,x,x,x} ==> vector <op> scalar. */
- if (vector_scalar_shift
+ if (vector_scalar_shift
&& ((TREE_CODE (rhs2) == VECTOR_CST
&& (first = uniform_vector_p (rhs2)) != NULL_TREE)
- || (TREE_CODE (rhs2) == SSA_NAME
+ || (TREE_CODE (rhs2) == SSA_NAME
&& (def_stmt = SSA_NAME_DEF_STMT (rhs2))
&& gimple_assign_single_p (def_stmt)
&& (first = uniform_vector_p
@@ -516,14 +793,14 @@ expand_vector_operations_1 (gimple_stmt_
else
op = optab_for_tree_code (code, type, optab_vector);
}
-
+
/* Try for a vector/scalar shift, and if we don't have one, see if we
have a vector/vector shift */
else if (!vector_scalar_shift)
{
op = optab_for_tree_code (code, type, optab_vector);
- if (op && (optab_handler (op, TYPE_MODE (type))
+ if (op && (optab_handler (op, TYPE_MODE (type))
!= CODE_FOR_nothing))
{
/* Transform vector <op> scalar => vector <op> {x,x,x,x}. */
@@ -613,9 +890,9 @@ expand_vector_operations_1 (gimple_stmt_
if it may need the bit-twiddling tricks implemented in this file. */
static bool
-gate_expand_vector_operations (void)
+gate_expand_vector_operations_ssa (void)
{
- return flag_tree_vectorize != 0;
+ return optimize == 0;
}
static unsigned int
@@ -648,7 +925,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower", /* name */
- 0, /* gate */
+ gate_expand_vector_operations_ssa, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -661,6 +938,7 @@ struct gimple_opt_pass pass_lower_vector
TODO_update_ssa /* todo_flags_finish */
| TODO_verify_ssa
| TODO_verify_stmts | TODO_verify_flow
+ | TODO_cleanup_cfg
}
};
@@ -669,7 +947,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower2", /* name */
- gate_expand_vector_operations, /* gate */
+ 0, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -682,6 +960,7 @@ struct gimple_opt_pass pass_lower_vector
TODO_update_ssa /* todo_flags_finish */
| TODO_verify_ssa
| TODO_verify_stmts | TODO_verify_flow
+ | TODO_cleanup_cfg
}
};
===================================================================
@@ -3178,7 +3178,7 @@ tree-vect-generic.o : tree-vect-generic.
$(TM_H) $(TREE_FLOW_H) $(GIMPLE_H) tree-iterator.h $(TREE_PASS_H) \
$(FLAGS_H) $(OPTABS_H) $(MACHMODE_H) $(EXPR_H) \
langhooks.h $(FLAGS_H) $(DIAGNOSTIC_H) gt-tree-vect-generic.h $(GGC_H) \
- coretypes.h insn-codes.h
+ coretypes.h insn-codes.h $(DIAGNOSTIC_H)
df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
===================================================================
@@ -2615,6 +2615,7 @@ get_gimple_rhs_num_ops (enum tree_code c
|| (SYM) == WIDEN_MULT_MINUS_EXPR \
|| (SYM) == DOT_PROD_EXPR \
|| (SYM) == REALIGN_LOAD_EXPR \
+ || (SYM) == VEC_SHUFFLE_EXPR \
|| (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == COND_EXPR \
|| (SYM) == CONSTRUCTOR \
===================================================================
@@ -3711,6 +3711,59 @@ verify_gimple_assign_ternary (gimple stm
}
break;
+ case VEC_SHUFFLE_EXPR:
+ if (!useless_type_conversion_p (lhs_type, rhs1_type)
+ || !useless_type_conversion_p (lhs_type, rhs2_type))
+ {
+ error ("type mismatch in vector shuffle expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+ || TREE_CODE (rhs2_type) != VECTOR_TYPE
+ || TREE_CODE (rhs3_type) != VECTOR_TYPE)
+ {
+ error ("vector types expected in vector shuffle expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ if (TYPE_VECTOR_SUBPARTS (rhs1_type) != TYPE_VECTOR_SUBPARTS (rhs2_type)
+ || TYPE_VECTOR_SUBPARTS (rhs2_type)
+ != TYPE_VECTOR_SUBPARTS (rhs3_type)
+ || TYPE_VECTOR_SUBPARTS (rhs3_type)
+ != TYPE_VECTOR_SUBPARTS (lhs_type))
+ {
+ error ("vectors with different element number found "
+ "in vector shuffle expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ if (TREE_CODE (TREE_TYPE (rhs3_type)) != INTEGER_TYPE
+ || GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs3_type)))
+ != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1_type))))
+ {
+ error ("invalid mask type in vector shuffle expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ return false;
+
case DOT_PROD_EXPR:
case REALIGN_LOAD_EXPR:
/* FIXME. */
===================================================================
@@ -1354,7 +1354,6 @@ init_optimization_passes (void)
NEXT_PASS (pass_vectorize);
{
struct opt_pass **p = &pass_vectorize.pass.sub;
- NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_dce_loop);
}
NEXT_PASS (pass_predcom);
@@ -1366,6 +1365,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_lim);
NEXT_PASS (pass_tree_loop_done);
}
+ NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_cse_reciprocals);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_vrp);
===================================================================
@@ -5989,6 +5989,46 @@ c_parser_alignof_expression (c_parser *p
}
}
+/* Helper function to read arguments of builtins which are interfaces
+ for the middle-end nodes like COMPLEX_EXPR, VEC_SHUFFLE_EXPR and
+ others. The name of the builtin is passed using BNAME parameter.
+ Function returns true if there were no errors while parsing and
+ stores the arguments in EXPR_LIST. List of original types can be
+ obtained by passing non NULL value to ORIG_TYPES. */
+static bool
+c_parser_get_builtin_args (c_parser *parser, const char *bname,
+ VEC(tree,gc) **expr_list,
+ VEC(tree,gc) **orig_types)
+{
+ location_t loc = c_parser_peek_token (parser)->location;
+ *expr_list = NULL;
+
+ if (c_parser_next_token_is_not (parser, CPP_OPEN_PAREN))
+ {
+ error_at (loc, "cannot take address of %qs", bname);
+ return false;
+ }
+
+ c_parser_consume_token (parser);
+
+ if (c_parser_next_token_is (parser, CPP_CLOSE_PAREN))
+ {
+ c_parser_consume_token (parser);
+ return true;
+ }
+
+ if (orig_types)
+ *expr_list = c_parser_expr_list (parser, false, false, orig_types);
+ else
+ *expr_list = c_parser_expr_list (parser, false, false, NULL);
+
+ if (!c_parser_require (parser, CPP_CLOSE_PAREN, "expected %<)%>"))
+ return false;
+
+ return true;
+}
+
+
/* Parse a postfix expression (C90 6.3.1-6.3.2, C99 6.5.1-6.5.2).
postfix-expression:
@@ -6027,6 +6067,10 @@ c_parser_alignof_expression (c_parser *p
assignment-expression )
__builtin_types_compatible_p ( type-name , type-name )
__builtin_complex ( assignment-expression , assignment-expression )
+ __builtin_shuffle ( assignment-expression , assignment-expression )
+ __builtin_shuffle ( assignment-expression ,
+ assignment-expression ,
+ assignment-expression, )
offsetof-member-designator:
identifier
@@ -6047,7 +6091,7 @@ c_parser_alignof_expression (c_parser *p
static struct c_expr
c_parser_postfix_expression (c_parser *parser)
{
- struct c_expr expr, e1, e2, e3;
+ struct c_expr expr, e1;
struct c_type_name *t1, *t2;
location_t loc = c_parser_peek_token (parser)->location;;
expr.original_code = ERROR_MARK;
@@ -6333,45 +6377,55 @@ c_parser_postfix_expression (c_parser *p
}
break;
case RID_CHOOSE_EXPR:
- c_parser_consume_token (parser);
- if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>"))
- {
- expr.value = error_mark_node;
- break;
- }
- loc = c_parser_peek_token (parser)->location;
- e1 = c_parser_expr_no_commas (parser, NULL);
- if (!c_parser_require (parser, CPP_COMMA, "expected %<,%>"))
- {
- c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
- expr.value = error_mark_node;
- break;
- }
- e2 = c_parser_expr_no_commas (parser, NULL);
- if (!c_parser_require (parser, CPP_COMMA, "expected %<,%>"))
- {
- c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
- expr.value = error_mark_node;
- break;
- }
- e3 = c_parser_expr_no_commas (parser, NULL);
- c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
- "expected %<)%>");
{
- tree c;
+ VEC(tree,gc) *expr_list;
+ VEC(tree,gc) *orig_types;
+ tree e1value, e2value, e3value, c;
- c = e1.value;
- mark_exp_read (e2.value);
- mark_exp_read (e3.value);
+ c_parser_consume_token (parser);
+ if (!c_parser_get_builtin_args (parser,
+ "__builtin_choose_expr",
+ &expr_list, &orig_types))
+ {
+ expr.value = error_mark_node;
+ break;
+ }
+
+ if (VEC_length (tree, expr_list) != 3)
+ {
+ error_at (loc, "wrong number of arguments to "
+ "%<__builtin_choose_expr%>");
+ expr.value = error_mark_node;
+ break;
+ }
+
+ e1value = VEC_index (tree, expr_list, 0);
+ e2value = VEC_index (tree, expr_list, 1);
+ e3value = VEC_index (tree, expr_list, 2);
+
+ c = e1value;
+ mark_exp_read (e2value);
+ mark_exp_read (e3value);
if (TREE_CODE (c) != INTEGER_CST
|| !INTEGRAL_TYPE_P (TREE_TYPE (c)))
error_at (loc,
"first argument to %<__builtin_choose_expr%> not"
" a constant");
constant_expression_warning (c);
- expr = integer_zerop (c) ? e3 : e2;
+
+ if (integer_zerop (c))
+ {
+ expr.value = e3value;
+ expr.original_type = VEC_index (tree, orig_types, 2);
+ }
+ else
+ {
+ expr.value = e2value;
+ expr.original_type = VEC_index (tree, orig_types, 1);
+ }
+
+ break;
}
- break;
case RID_TYPES_COMPATIBLE_P:
c_parser_consume_token (parser);
if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>"))
@@ -6410,57 +6464,96 @@ c_parser_postfix_expression (c_parser *p
}
break;
case RID_BUILTIN_COMPLEX:
- c_parser_consume_token (parser);
- if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>"))
- {
- expr.value = error_mark_node;
- break;
- }
- loc = c_parser_peek_token (parser)->location;
- e1 = c_parser_expr_no_commas (parser, NULL);
- if (!c_parser_require (parser, CPP_COMMA, "expected %<,%>"))
- {
- c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
- expr.value = error_mark_node;
- break;
- }
- e2 = c_parser_expr_no_commas (parser, NULL);
- c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
- "expected %<)%>");
- mark_exp_read (e1.value);
- if (TREE_CODE (e1.value) == EXCESS_PRECISION_EXPR)
- e1.value = convert (TREE_TYPE (e1.value),
- TREE_OPERAND (e1.value, 0));
- mark_exp_read (e2.value);
- if (TREE_CODE (e2.value) == EXCESS_PRECISION_EXPR)
- e2.value = convert (TREE_TYPE (e2.value),
- TREE_OPERAND (e2.value, 0));
- if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (e1.value))
- || DECIMAL_FLOAT_TYPE_P (TREE_TYPE (e1.value))
- || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (e2.value))
- || DECIMAL_FLOAT_TYPE_P (TREE_TYPE (e2.value)))
- {
- error_at (loc, "%<__builtin_complex%> operand "
- "not of real binary floating-point type");
- expr.value = error_mark_node;
- break;
- }
- if (TYPE_MAIN_VARIANT (TREE_TYPE (e1.value))
- != TYPE_MAIN_VARIANT (TREE_TYPE (e2.value)))
- {
- error_at (loc,
- "%<__builtin_complex%> operands of different types");
- expr.value = error_mark_node;
- break;
- }
- if (!flag_isoc99)
- pedwarn (loc, OPT_pedantic,
- "ISO C90 does not support complex types");
- expr.value = build2 (COMPLEX_EXPR,
- build_complex_type (TYPE_MAIN_VARIANT
- (TREE_TYPE (e1.value))),
- e1.value, e2.value);
- break;
+ {
+ VEC(tree,gc) *expr_list;
+ tree e1value, e2value;
+
+ c_parser_consume_token (parser);
+ if (!c_parser_get_builtin_args (parser,
+ "__builtin_complex",
+ &expr_list, NULL))
+ {
+ expr.value = error_mark_node;
+ break;
+ }
+
+ if (VEC_length (tree, expr_list) != 2)
+ {
+ error_at (loc, "wrong number of arguments to "
+ "%<__builtin_complex%>");
+ expr.value = error_mark_node;
+ break;
+ }
+
+ e1value = VEC_index (tree, expr_list, 0);
+ e2value = VEC_index (tree, expr_list, 1);
+
+ mark_exp_read (e1value);
+ if (TREE_CODE (e1value) == EXCESS_PRECISION_EXPR)
+ e1value = convert (TREE_TYPE (e1value),
+ TREE_OPERAND (e1value, 0));
+ mark_exp_read (e2value);
+ if (TREE_CODE (e2value) == EXCESS_PRECISION_EXPR)
+ e2value = convert (TREE_TYPE (e2value),
+ TREE_OPERAND (e2value, 0));
+ if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (e1value))
+ || DECIMAL_FLOAT_TYPE_P (TREE_TYPE (e1value))
+ || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (e2value))
+ || DECIMAL_FLOAT_TYPE_P (TREE_TYPE (e2value)))
+ {
+ error_at (loc, "%<__builtin_complex%> operand "
+ "not of real binary floating-point type");
+ expr.value = error_mark_node;
+ break;
+ }
+ if (TYPE_MAIN_VARIANT (TREE_TYPE (e1value))
+ != TYPE_MAIN_VARIANT (TREE_TYPE (e2value)))
+ {
+ error_at (loc,
+ "%<__builtin_complex%> operands of different types");
+ expr.value = error_mark_node;
+ break;
+ }
+ if (!flag_isoc99)
+ pedwarn (loc, OPT_pedantic,
+ "ISO C90 does not support complex types");
+ expr.value = build2 (COMPLEX_EXPR,
+ build_complex_type (TYPE_MAIN_VARIANT
+ (TREE_TYPE (e1value))),
+ e1value, e2value);
+ break;
+ }
+ case RID_BUILTIN_SHUFFLE:
+ {
+ VEC(tree,gc) *expr_list;
+
+ c_parser_consume_token (parser);
+ if (!c_parser_get_builtin_args (parser,
+ "__builtin_shuffle",
+ &expr_list, NULL))
+ {
+ expr.value = error_mark_node;
+ break;
+ }
+
+ if (VEC_length (tree, expr_list) == 2)
+ expr.value = c_build_vec_shuffle_expr
+ (loc, VEC_index (tree, expr_list, 0),
+ NULL_TREE,
+ VEC_index (tree, expr_list, 1));
+ else if (VEC_length (tree, expr_list) == 3)
+ expr.value = c_build_vec_shuffle_expr
+ (loc, VEC_index (tree, expr_list, 0),
+ VEC_index (tree, expr_list, 1),
+ VEC_index (tree, expr_list, 2));
+ else
+ {
+ error_at (loc, "wrong number of arguments to "
+ "%<__builtin_shuffle%>");
+ expr.value = error_mark_node;
+ }
+ break;
+ }
case RID_AT_SELECTOR:
gcc_assert (c_dialect_objc ());
c_parser_consume_token (parser);
===================================================================
@@ -231,6 +231,12 @@ (define_mode_attr sseinsnmode
(V4SF "V4SF") (V2DF "V2DF")
(TI "TI") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
+;; All 128bit vector modes
+(define_mode_attr sseshuffint
+ [(V16QI "V16QI") (V8HI "V8HI")
+ (V4SI "V4SI") (V2DI "V2DI")
+ (V4SF "V4SI") (V2DF "V2DI")])
+
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
@@ -6234,6 +6240,18 @@ (define_expand "vconduv2di"
DONE;
})
+(define_expand "vshuffle<mode>"
+ [(match_operand:V_128 0 "register_operand" "")
+ (match_operand:V_128 1 "general_operand" "")
+ (match_operand:<sseshuffint> 2 "general_operand" "")]
+ "TARGET_SSSE3 || TARGET_AVX"
+{
+ bool ok = ix86_expand_vshuffle (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel bitwise logical operations
===================================================================
@@ -118,6 +118,7 @@ extern bool ix86_expand_int_movcc (rtx[]
extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
+extern bool ix86_expand_vshuffle (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
===================================================================
@@ -18693,6 +18693,93 @@ ix86_expand_int_vcond (rtx operands[])
return true;
}
+bool
+ix86_expand_vshuffle (rtx operands[])
+{
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx mask = operands[2];
+ rtx mm, vt, cv0, t1;
+ enum machine_mode mode = GET_MODE (op0);
+ enum machine_mode maskmode = GET_MODE (mask);
+ enum machine_mode maskinner = GET_MODE_INNER (mode);
+ rtx vec[16];
+ int w, i, j;
+
+ gcc_assert ((TARGET_SSSE3 || TARGET_AVX) && GET_MODE_BITSIZE (mode) == 128);
+
+ op0 = force_reg (mode, op0);
+ mask = force_reg (maskmode, mask);
+
+ /* Number of elements in the vector. */
+ w = GET_MODE_BITSIZE (maskmode) / GET_MODE_BITSIZE (maskinner);
+
+ /* mask = mask & {w-1, w-1, w-1,...} */
+ for (i = 0; i < w; i++)
+ vec[i] = GEN_INT (w - 1);
+
+ mm = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+ mask = expand_simple_binop (maskmode, AND, mask, mm,
+ NULL_RTX, 0, OPTAB_DIRECT);
+
+ /* Convert mask to vector of chars. */
+ mask = simplify_gen_subreg (V16QImode, mask, maskmode, 0);
+ mask = force_reg (V16QImode, mask);
+
+
+ /* Build a helper mask wich we will use in pshufb
+ (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
+ (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}
+ ... */
+ for (i = 0; i < w; i++)
+ for (j = 0; j < 16/w; j++)
+ vec[i*w+j] = GEN_INT (i*16/w);
+
+ vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ vt = force_reg (V16QImode, vt);
+
+ t1 = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ssse3_pshufbv16qi3 (t1, mask, vt));
+ mm = t1;
+
+ /* MM contains now something like
+ mm = {m[0], .., m[0], m[k], .., m[k], ... }, where
+ m[i] is an index of the element in the vector we are
+ selecting from.
+
+ Convert it into the byte positions by doing
+ mm = mm * {16/w, 16/w, ...}
+ mm = mm + {0,1,..,16/w, 0,1,..,16/w, ...} */
+ for (i = 0; i < 16; i++)
+ vec[i] = GEN_INT (16/w);
+
+ cv0 = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ mm = expand_simple_binop (V16QImode, MULT, mm, cv0,
+ NULL_RTX, 0, OPTAB_DIRECT);
+
+ for (i = 0; i < w; i++)
+ for (j = 0; j < 16/w; j++)
+ vec[i*w+j] = GEN_INT (j);
+
+ cv0 = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ mm = expand_simple_binop (V16QImode, PLUS, mm, cv0,
+ NULL_RTX, 0, OPTAB_DIRECT);
+
+ t1 = gen_reg_rtx (V16QImode);
+
+ /* Convert OP0 to vector of chars. */
+ op0 = simplify_gen_subreg (V16QImode, op0, mode, 0);
+ op0 = force_reg (V16QImode, op0);
+ emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mm));
+
+ /* Convert it back from vector of chars to the original mode. */
+ t1 = simplify_gen_subreg (mode, t1, V16QImode, 0);
+
+ emit_insn (gen_rtx_SET (VOIDmode, target, t1));
+
+ return true;
+}
+
/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
true if we should do zero extension, else sign extension. HIGH_P is
true if we want the N/2 high elements, else the low elements. */
@@ -30911,6 +30998,9 @@ struct expand_vec_perm_d
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static int extract_vec_perm_cst (struct expand_vec_perm_d *, tree);
+static bool ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask);
+
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
@@ -32417,7 +32507,7 @@ void ix86_emit_i387_round (rtx op0, rtx
res = gen_reg_rtx (outmode);
half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
-
+
/* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
/* scratch = fxam(op1) */
@@ -34576,10 +34666,10 @@ ix86_vectorize_builtin_vec_perm_ok (tree
vec_mask = extract_vec_perm_cst (&d, mask);
- /* This hook is cannot be called in response to something that the
- user does (unlike the builtin expander) so we shouldn't ever see
- an error generated from the extract. */
- gcc_assert (vec_mask > 0 && vec_mask <= 3);
+ /* Check whether the mask can be applied to the vector type. */
+ if (vec_mask < 0 || vec_mask > 3)
+ return false;
+
one_vec = (vec_mask != 3);
/* Implementable with shufps or pshufd. */
===================================================================
@@ -943,6 +943,7 @@ get_expr_operands (gimple stmt, tree *ex
case COND_EXPR:
case VEC_COND_EXPR:
+ case VEC_SHUFFLE_EXPR:
get_expr_operands (stmt, &TREE_OPERAND (expr, 0), uflags);
get_expr_operands (stmt, &TREE_OPERAND (expr, 1), uflags);
get_expr_operands (stmt, &TREE_OPERAND (expr, 2), uflags);