===================================================================
@@ -6561,6 +6561,32 @@ invoke undefined behavior at runtime. W
accesses for vector subscription can be enabled with
@option{-Warray-bounds}.
+Vector shuffling is available using functions
+@code{__builtin_shuffle (vec, mask)} and
+@code{__builtin_shuffle (vec0, vec1, mask)}. Both functions construct
+a permutation of elements from one or two vectors and return a vector
+of the same type as input vector(s). The mask is a vector of
+integer-typed elements. The size of each element of the mask must be
+the same as the size of each input vector element. The number of
+elements in input vector(s) and mask must be the same.
+
+The elements of the input vectors are numbered from left to right across
+one or both of the vectors. Each element in the mask specifies a number
+of element from the input vector(s). Consider the following example.
+
+@smallexample
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,2,3,4@};
+v4si b = @{5,6,7,8@};
+v4si mask1 = @{0,1,1,3@};
+v4si mask2 = @{0,4,2,5@};
+v4si res;
+
+res = __builtin_shuffle (a, mask1); /* res is @{1,2,2,4@} */
+res = __builtin_shuffle2 (a, b, mask2); /* res is @{1,5,3,6@} */
+@end smallexample
+
You can declare variables and use them in function calls and returns, as
well as in assignments and some casts. You can specify a vector type as
a return type for a function. Vector types can also be used as function
===================================================================
@@ -2067,6 +2067,16 @@ dump_generic_node (pretty_printer *buffe
dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
pp_string (buffer, " > ");
break;
+
+ case VEC_SHUFFLE_EXPR:
+ pp_string (buffer, " VEC_SHUFFLE_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, " , ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (buffer, " , ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
case DOT_PROD_EXPR:
pp_string (buffer, " DOT_PROD_EXPR < ");
===================================================================
@@ -425,6 +425,7 @@ const struct c_common_resword c_common_r
{ "__attribute__", RID_ATTRIBUTE, 0 },
{ "__builtin_choose_expr", RID_CHOOSE_EXPR, D_CONLY },
{ "__builtin_complex", RID_BUILTIN_COMPLEX, D_CONLY },
+ { "__builtin_shuffle", RID_BUILTIN_SHUFFLE, D_CONLY },
{ "__builtin_offsetof", RID_OFFSETOF, 0 },
{ "__builtin_types_compatible_p", RID_TYPES_COMPATIBLE_P, D_CONLY },
{ "__builtin_va_arg", RID_VA_ARG, 0 },
===================================================================
@@ -103,7 +103,7 @@ enum rid
/* C extensions */
RID_ASM, RID_TYPEOF, RID_ALIGNOF, RID_ATTRIBUTE, RID_VA_ARG,
RID_EXTENSION, RID_IMAGPART, RID_REALPART, RID_LABEL, RID_CHOOSE_EXPR,
- RID_TYPES_COMPATIBLE_P, RID_BUILTIN_COMPLEX,
+ RID_TYPES_COMPATIBLE_P, RID_BUILTIN_COMPLEX, RID_BUILTIN_SHUFFLE,
RID_DFLOAT32, RID_DFLOAT64, RID_DFLOAT128,
RID_FRACT, RID_ACCUM,
@@ -898,6 +898,7 @@ extern tree build_function_call (locatio
extern tree build_function_call_vec (location_t, tree,
VEC(tree,gc) *, VEC(tree,gc) *);
+extern tree c_build_vec_shuffle_expr (location_t, tree, tree, tree);
extern tree resolve_overloaded_builtin (location_t, tree, VEC(tree,gc) *);
===================================================================
@@ -6620,6 +6620,82 @@ vector_compare_rtx (tree cond, bool unsi
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
}
+/* Return true if VEC_SHUFF_EXPR can be expanded using SIMD extensions
+ of the CPU. */
+bool
+expand_vec_shuffle_expr_p (enum machine_mode mode, tree v0,
+ tree v1, tree mask)
+{
+ int v0_mode_s = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (v0))));
+ int mask_mode_s = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (mask))));
+
+ if (TREE_CODE (mask) == VECTOR_CST
+ && targetm.vectorize.builtin_vec_perm_ok (TREE_TYPE (v0), mask))
+ return true;
+
+ if (v0 != v1 || v0_mode_s != mask_mode_s)
+ return false;
+
+ return direct_optab_handler (vshuffle_optab, mode) != CODE_FOR_nothing;
+}
+
+/* Generate instructions for VEC_COND_EXPR given its type and three
+ operands. */
+rtx
+expand_vec_shuffle_expr (tree type, tree v0, tree v1, tree mask, rtx target)
+{
+ struct expand_operand ops[4];
+ enum insn_code icode;
+ enum machine_mode mode = TYPE_MODE (type);
+ rtx rtx_v0, rtx_mask;
+
+ gcc_assert (expand_vec_shuffle_expr_p (mode, v0, v1, mask));
+
+ if (TREE_CODE (mask) == VECTOR_CST)
+ {
+ tree m_type, call;
+ tree fn = targetm.vectorize.builtin_vec_perm (TREE_TYPE (v0), &m_type);
+ rtx t;
+
+ if (!fn)
+ goto vshuffle;
+
+ if (m_type != TREE_TYPE (TREE_TYPE (mask)))
+ {
+ int units = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask));
+ tree cvt = build_vector_type (m_type, units);
+ mask = fold_convert (cvt, mask);
+ }
+
+ fn = copy_node (fn);
+ call = fold_build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn);
+ call = build_call_nary (type /* ? */, call, 3, v0, v1, mask);
+
+ t = expand_normal (call);
+ target = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, target, t));
+ return target;
+ }
+
+vshuffle:
+ gcc_assert (v1 == v0);
+
+ icode = direct_optab_handler (vshuffle_optab, mode);
+
+ if (icode == CODE_FOR_nothing)
+ return 0;
+
+ rtx_v0 = expand_normal (v0);
+ rtx_mask = expand_normal (mask);
+
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], rtx_v0, mode);
+ create_input_operand (&ops[2], rtx_mask, mode);
+ expand_insn (icode, 3, ops);
+
+ return ops[0].value;
+}
+
/* Return insn code for TYPE, the type of a VEC_COND_EXPR. */
static inline enum insn_code
===================================================================
@@ -636,6 +636,9 @@ enum direct_optab_index
DOI_vcond,
DOI_vcondu,
+ /* Vector shuffling. */
+ DOI_vshuffle,
+
/* Block move operation. */
DOI_movmem,
@@ -701,6 +704,7 @@ typedef struct direct_optab_d *direct_op
#define reload_out_optab (&direct_optab_table[(int) DOI_reload_out])
#define vcond_optab (&direct_optab_table[(int) DOI_vcond])
#define vcondu_optab (&direct_optab_table[(int) DOI_vcondu])
+#define vshuffle_optab (&direct_optab_table[(int) DOI_vshuffle])
#define movmem_optab (&direct_optab_table[(int) DOI_movmem])
#define setmem_optab (&direct_optab_table[(int) DOI_setmem])
#define cmpstr_optab (&direct_optab_table[(int) DOI_cmpstr])
@@ -879,8 +883,15 @@ extern rtx expand_widening_mult (enum ma
/* Return tree if target supports vector operations for COND_EXPR. */
bool expand_vec_cond_expr_p (tree, enum machine_mode);
+/* Return tree if target supports vector operations for VEC_SHUFFLE_EXPR. */
+bool expand_vec_shuffle_expr_p (enum machine_mode, tree, tree, tree);
+
/* Generate code for VEC_COND_EXPR. */
extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
+
+/* Generate code for VEC_SHUFFLE_EXPR. */
+extern rtx expand_vec_shuffle_expr (tree, tree, tree, tree, rtx);
+
/* Generate code for VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR. */
extern rtx expand_vec_shift_expr (sepops, rtx);
===================================================================
@@ -255,6 +255,7 @@ static const char * const optabs[] =
"set_optab_handler (vec_realign_load_optab, $A, CODE_FOR_$(vec_realign_load_$a$))",
"set_direct_optab_handler (vcond_optab, $A, CODE_FOR_$(vcond$a$))",
"set_direct_optab_handler (vcondu_optab, $A, CODE_FOR_$(vcondu$a$))",
+ "set_direct_optab_handler (vshuffle_optab, $A, CODE_FOR_$(vshuffle$a$))",
"set_optab_handler (ssum_widen_optab, $A, CODE_FOR_$(widen_ssum$I$a3$))",
"set_optab_handler (usum_widen_optab, $A, CODE_FOR_$(widen_usum$I$a3$))",
"set_optab_handler (udot_prod_optab, $A, CODE_FOR_$(udot_prod$I$a$))",
===================================================================
@@ -0,0 +1,44 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {5, 5,5,5,5,5,argc,7};
+ vector (8, short) v1 = {argc, 1,8,8,4,9,argc,4};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = __builtin_shuffle (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = __builtin_shuffle (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = __builtin_shuffle (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = __builtin_shuffle (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,50 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) y, vector (8, short) mask) {
+ return __builtin_shuffle (x, y, mask);
+}
+
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = f (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = f (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = f (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = f (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,46 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ /*vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) smask = {0,0,1,2,3,4,5,6};
+
+ v2 = __builtin_shuffle (v0, smask);
+ shufcompare (short, 8, v2, v0, smask);
+ v2 = __builtin_shuffle (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+ v2 = __builtin_shuffle (smask, v0);
+ shufcompare (short, 8, v2, smask, v0);*/
+
+ vector (4, int) i0 = {argc, 1,2,3};
+ vector (4, int) i1 = {argc, 1, argc, 3};
+ vector (4, int) i2;
+
+ vector (4, int) imask = {0,3,2,1};
+
+ /*i2 = __builtin_shuffle (i0, imask);
+ shufcompare (int, 4, i2, i0, imask);*/
+ i2 = __builtin_shuffle (i0, i1);
+ shufcompare (int, 4, i2, i0, i1);
+
+ i2 = __builtin_shuffle (imask, i0);
+ shufcompare (int, 4, i2, imask, i0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,36 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) mask) {
+ return __builtin_shuffle (x, mask);
+}
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) mask = {0,0,1,2,3,4,5,6};
+
+ v2 = f (v0, mask);
+ shufcompare (short, 8, v2, v0, mask);
+
+ v2 = f (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+
+ return 0;
+}
+
===================================================================
@@ -8605,6 +8605,10 @@ expand_expr_real_2 (sepops ops, rtx targ
case VEC_PACK_FIX_TRUNC_EXPR:
mode = TYPE_MODE (TREE_TYPE (treeop0));
goto binop;
+
+ case VEC_SHUFFLE_EXPR:
+ target = expand_vec_shuffle_expr (type, treeop0, treeop1, treeop2, target);
+ return target;
case DOT_PROD_EXPR:
{
===================================================================
@@ -417,6 +417,16 @@ dump_ternary_rhs (pretty_printer *buffer
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
pp_string (buffer, ">");
break;
+
+ case VEC_SHUFFLE_EXPR:
+ pp_string (buffer, "VEC_SHUFFLE_EXPR <");
+ dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+ pp_string (buffer, ">");
+ break;
case REALIGN_LOAD_EXPR:
pp_string (buffer, "REALIGN_LOAD <");
===================================================================
@@ -2845,6 +2845,89 @@ build_function_call_vec (location_t loc,
}
return require_complete_type (result);
}
+
+/* Build a VEC_SHUFLE_EXPR if V0, V1 and MASK are not error_mark_nodes
+ and have vector types, V0 has the same type as V1, and the number of
+ elements of V0, V1, MASK is the same. */
+tree
+c_build_vec_shuffle_expr (location_t loc, tree v0, tree v1, tree mask)
+{
+ tree vec_shuffle, tmp;
+ bool wrap = true;
+ bool maybe_const = false;
+ bool two_arguments = v0 == v1;
+
+
+ if (v0 == error_mark_node || v1 == error_mark_node
+ || mask == error_mark_node)
+ return error_mark_node;
+
+ if (TREE_CODE (TREE_TYPE (mask)) != VECTOR_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (mask))) != INTEGER_TYPE)
+ {
+ error_at (loc, "__builtin_shuffle last argument must "
+ "be an integer vector");
+ return error_mark_node;
+ }
+
+ if (TREE_CODE (TREE_TYPE (v0)) != VECTOR_TYPE
+ || TREE_CODE (TREE_TYPE (v1)) != VECTOR_TYPE)
+ {
+ error_at (loc, "__builtin_shuffle arguments must be vectors");
+ return error_mark_node;
+ }
+
+ if (TREE_TYPE (v0) != TREE_TYPE (v1))
+ {
+ error_at (loc, "__builtin_shuffle argument vectors must be of "
+ "the same type");
+ return error_mark_node;
+ }
+
+ if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (v0))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask))
+ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (v1))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)))
+ {
+ error_at (loc, "__builtin_shuffle number of elements of the "
+ "argument vector(s) and the mask vector should "
+ "be the same");
+ return error_mark_node;
+ }
+
+ if (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (v0))))
+ != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (mask)))))
+ {
+ error_at (loc, "__builtin_shuffle argument vector(s) inner type "
+ "must have the same size as inner type of the mask");
+ return error_mark_node;
+ }
+
+ /* Avoid C_MAYBE_CONST_EXPRs inside VEC_SHUFFLE_EXPR. */
+ tmp = c_fully_fold (v0, false, &maybe_const);
+ v0 = save_expr (tmp);
+ wrap &= maybe_const;
+
+ if (!two_arguments)
+ {
+ tmp = c_fully_fold (v1, false, &maybe_const);
+ v1 = save_expr (tmp);
+ wrap &= maybe_const;
+ }
+ else
+ v1 = v0;
+
+ tmp = c_fully_fold (mask, false, &maybe_const);
+ mask = save_expr (tmp);
+ wrap &= maybe_const;
+
+ vec_shuffle = build3 (VEC_SHUFFLE_EXPR, TREE_TYPE (v0), v0, v1, mask);
+
+ if (!wrap)
+ vec_shuffle = c_wrap_maybe_const (vec_shuffle, true);
+
+ return vec_shuffle;
+}
/* Convert the argument expressions in the vector VALUES
to the types in the list TYPELIST.
@@ -6120,7 +6203,14 @@ digest_init (location_t init_loc, tree t
tree value;
bool constant_p = true;
- /* Iterate through elements and check if all constructor
+ /* If constructor has less elements than the vector type. */
+ if (CONSTRUCTOR_NELTS (inside_init)
+ < TYPE_VECTOR_SUBPARTS (TREE_TYPE (inside_init)))
+ warning_at (init_loc, 0, "vector length does not match "
+ "initializer length, zero elements "
+ "will be inserted");
+
+ /* Iterate through elements and check if all constructor
elements are *_CSTs. */
FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (inside_init), ix, value)
if (!CONSTANT_CLASS_P (value))
===================================================================
@@ -7053,6 +7053,32 @@ gimplify_expr (tree *expr_p, gimple_seq
}
break;
+ case VEC_SHUFFLE_EXPR:
+ {
+ enum gimplify_status r0, r1, r2;
+
+ if (TREE_OPERAND (*expr_p, 0) == TREE_OPERAND (*expr_p, 1))
+ {
+ r0 = r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ TREE_OPERAND (*expr_p, 1) = TREE_OPERAND (*expr_p, 0);
+ }
+ else
+ {
+ r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ }
+
+ r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ recalculate_side_effects (*expr_p);
+
+ ret = MIN (r0, MIN (r1, r2));
+ break;
+ }
+
case TARGET_MEM_REF:
{
enum gimplify_status r0 = GS_ALL_DONE, r1 = GS_ALL_DONE;
===================================================================
@@ -497,6 +497,19 @@ DEFTREECODE (COND_EXPR, "cond_expr", tcc
*/
DEFTREECODE (VEC_COND_EXPR, "vec_cond_expr", tcc_expression, 3)
+/* Vector shuffle expression. A = VEC_SHUFFLE_EXPR<v0, v1, maks>
+ means
+
+ freach i in length (mask):
+ A = mask[i] < length (v0) ? v0[mask[i]] : v1[mask[i]]
+
+ V0 and V1 are vectors of the same type. MASK is an integer-typed
+ vector. The number of MASK elements must be the same with the
+ number of elements in V0 and V1. The size of the inner type
+ of the MASK and of the V0 and V1 must be the same.
+*/
+DEFTREECODE (VEC_SHUFFLE_EXPR, "vec_shuffle_expr", tcc_expression, 3)
+
/* Declare local variables, including making RTL and allocating space.
BIND_EXPR_VARS is a chain of VAR_DECL nodes for the variables.
BIND_EXPR_BODY is the body, the expression to be computed using
===================================================================
@@ -3285,6 +3285,7 @@ estimate_operator_cost (enum tree_code c
??? We may consider mapping RTL costs to this. */
case COND_EXPR:
case VEC_COND_EXPR:
+ case VEC_SHUFFLE_EXPR:
case PLUS_EXPR:
case POINTER_PLUS_EXPR:
===================================================================
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
#include "tree-pass.h"
#include "flags.h"
#include "ggc.h"
+#include "diagnostic.h"
/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
@@ -432,6 +433,279 @@ type_for_widest_vector_mode (enum machin
}
}
+
+/* Build a reference to the element of the vector VECT. Function
+ returns either the element itself, either BIT_FIELD_REF, or an
+ ARRAY_REF expression.
+
+ GSI is requred to insert temporary variables while building a
+ refernece to the element of the vector VECT.
+
+ PTMPVEC is a pointer to the temporary variable for caching
+ purposes. In case when PTMPVEC is NULL new temporary variable
+ will be created. */
+static tree
+vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
+{
+ tree type;
+ gimple asgn;
+ unsigned HOST_WIDE_INT maxval;
+ tree tmpvec;
+ tree indextype, arraytype;
+ bool need_asgn = true;
+
+ gcc_assert (TREE_CODE (TREE_TYPE (vect)) == VECTOR_TYPE);
+
+ type = TREE_TYPE (vect);
+ if (TREE_CODE (idx) == INTEGER_CST)
+ {
+ unsigned HOST_WIDE_INT index;
+
+ if (!host_integerp (idx, 1)
+ || (index = tree_low_cst (idx, 1)) > TYPE_VECTOR_SUBPARTS (type)-1)
+ return error_mark_node;
+
+ if (TREE_CODE (vect) == VECTOR_CST)
+ {
+ unsigned i;
+ tree vals = TREE_VECTOR_CST_ELTS (vect);
+ for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
+ if (i == index)
+ return TREE_VALUE (vals);
+ return error_mark_node;
+ }
+ else if (TREE_CODE (vect) == CONSTRUCTOR)
+ {
+ unsigned i;
+ VEC (constructor_elt, gc) *vals = CONSTRUCTOR_ELTS (vect);
+ constructor_elt *elt;
+
+ for (i = 0; VEC_iterate (constructor_elt, vals, i, elt); i++)
+ if (operand_equal_p (elt->index, idx, 0))
+ return elt->value;
+ return fold_convert (TREE_TYPE (type), integer_zero_node);
+ }
+ else if (TREE_CODE (vect) == SSA_NAME)
+ {
+ tree el;
+ gimple vectdef = SSA_NAME_DEF_STMT (vect);
+ if (gimple_assign_single_p (vectdef)
+ && (el = vector_element (gsi, gimple_assign_rhs1 (vectdef),
+ idx, ptmpvec))
+ != error_mark_node)
+ return el;
+ else
+ {
+ tree size = TYPE_SIZE (TREE_TYPE (type));
+ tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx),
+ idx, size);
+ return fold_build3 (BIT_FIELD_REF, TREE_TYPE (type),
+ vect, size, pos);
+ }
+ }
+ else
+ return error_mark_node;
+ }
+
+ if (!ptmpvec)
+ tmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else if (!*ptmpvec)
+ tmpvec = *ptmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else
+ {
+ tmpvec = *ptmpvec;
+ need_asgn = false;
+ }
+
+ if (need_asgn)
+ {
+ TREE_ADDRESSABLE (tmpvec) = 1;
+ asgn = gimple_build_assign (tmpvec, vect);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ }
+
+ maxval = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vect)) -1;
+ indextype = build_index_type (size_int (maxval));
+ arraytype = build_array_type (TREE_TYPE (type), indextype);
+
+ return build4 (ARRAY_REF, TREE_TYPE (type),
+ build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
+ idx, NULL_TREE, NULL_TREE);
+
+
+}
+
+/* Check if VEC_SHUFFLE_EXPR within the given setting is supported
+ by hardware, or lower it piecewie.
+
+ When VEC_SHUFFLE_EXPR has the same first and second operands:
+ VEC_SHUFFLE_EXPR <v0, v0, mask> the lowered version would be
+ {v0[mask[0]], v0[mask[1]], ...}
+ MASK and V0 must have the same number of elements.
+
+ Otherwise VEC_SHUFFLE_EXPR <v0, v1, mask> is lowered to
+ {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
+ V0 and V1 must have the same type. MASK, V0, V1 must have the
+ same number of arguments. */
+static void
+lower_vec_shuffle (gimple_stmt_iterator *gsi, location_t loc)
+{
+#define TRAP_RETURN(new_stmt, stmt, gsi, vec0) \
+do { \
+ new_stmt = gimple_build_call (built_in_decls[BUILT_IN_TRAP], 0); \
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); \
+ split_block (gimple_bb (new_stmt), new_stmt); \
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), vec0); \
+ gsi_replace (gsi, new_stmt, false); \
+ return; \
+} while (0)
+
+ gimple stmt = gsi_stmt (*gsi);
+ tree mask = gimple_assign_rhs3 (stmt);
+ tree vec0 = gimple_assign_rhs1 (stmt);
+ tree vec1 = gimple_assign_rhs2 (stmt);
+ unsigned els = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask));
+ tree type0 = TREE_TYPE (TREE_TYPE (vec0));
+ VEC(constructor_elt,gc) *v = NULL;
+ tree vectype, constr;
+ gimple new_stmt;
+ tree vec0tmp = NULL_TREE, masktmp = NULL_TREE;
+
+ if (expand_vec_shuffle_expr_p (TYPE_MODE (TREE_TYPE (vec0)), vec0, vec1, mask))
+ {
+ tree t;
+
+ t = gimplify_build3 (gsi, VEC_SHUFFLE_EXPR, TREE_TYPE (vec0),
+ vec0, vec1, mask);
+ gimple_assign_set_rhs_from_tree (gsi, t);
+ /* Statement should be updated by callee. */
+ return;
+ }
+
+
+ if (vec0 == vec1)
+ {
+ unsigned i;
+ tree vec0tmp = NULL_TREE;
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, vecel, t;
+
+ idxval = vector_element (gsi, mask, size_int (i), &masktmp);
+ if (idxval == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ vecel = vector_element (gsi, vec0, idxval, &vec0tmp);
+ if (vecel == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ t = force_gimple_operand_gsi (gsi, vecel, true,
+ NULL_TREE, true, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), t);
+ }
+ }
+ else
+ {
+ unsigned i;
+ tree var = create_tmp_var (type0, "vecel");
+ tree vec1tmp = NULL_TREE;
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, idx1val, cond, elval0, elval1, condexpr, t, ssatmp;
+ tree vec0el, vec1el;
+ gimple asgn;
+
+ idxval = vector_element (gsi, mask, size_int (i), &masktmp);
+ if (idxval == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ if (TREE_CODE (idxval) == INTEGER_CST)
+ {
+ if (tree_int_cst_lt (idxval, size_int (els)))
+ {
+ vec0el = vector_element (gsi, vec0, idxval, &vec0tmp);
+ t = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else if (tree_int_cst_lt (idxval, size_int (2*els)))
+ {
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+
+ vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp);
+ t = force_gimple_operand_gsi (gsi, vec1el, true,
+ NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+ }
+ else
+ {
+
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+ idx1val = force_gimple_operand_gsi (gsi, idx1val,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ cond = build2 (GT_EXPR, boolean_type_node, \
+ idxval, convert (type0, size_int (els - 1)));
+
+ vec0el = vector_element (gsi, vec0, idxval, &vec0tmp);
+ if (vec0el == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ elval0 = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp);
+ if (vec1el == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ elval1 = force_gimple_operand_gsi (gsi, vec1el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ condexpr = fold_build3 (COND_EXPR, type0, cond, \
+ elval1, elval0);
+
+ t = force_gimple_operand_gsi (gsi, condexpr, true, \
+ NULL_TREE, true, GSI_SAME_STMT);
+ }
+
+ asgn = gimple_build_assign (var, t);
+ ssatmp = make_ssa_name (var, asgn);
+ gimple_assign_set_lhs (asgn, ssatmp);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), ssatmp);
+ }
+ }
+
+ vectype = build_vector_type (type0, els);
+ constr = build_constructor (vectype, v);
+ gimple_assign_set_rhs_from_tree (gsi, constr);
+ /* Statement should be updated by callee. */
+}
+
/* Process one statement. If we identify a vector operation, expand it. */
static void
@@ -451,6 +725,13 @@ expand_vector_operations_1 (gimple_stmt_
code = gimple_assign_rhs_code (stmt);
rhs_class = get_gimple_rhs_class (code);
+ if (code == VEC_SHUFFLE_EXPR)
+ {
+ lower_vec_shuffle (gsi, gimple_location (stmt));
+ gimple_set_modified (gsi_stmt (*gsi), true);
+ update_stmt (gsi_stmt (*gsi));
+ }
+
if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
return;
@@ -612,10 +893,11 @@ expand_vector_operations_1 (gimple_stmt_
/* Use this to lower vector operations introduced by the vectorizer,
if it may need the bit-twiddling tricks implemented in this file. */
+
static bool
-gate_expand_vector_operations (void)
+gate_expand_vector_operations_noop (void)
{
- return flag_tree_vectorize != 0;
+ return optimize == 0;
}
static unsigned int
@@ -648,7 +930,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower", /* name */
- 0, /* gate */
+ gate_expand_vector_operations_noop, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -660,7 +942,8 @@ struct gimple_opt_pass pass_lower_vector
0, /* todo_flags_start */
TODO_update_ssa /* todo_flags_finish */
| TODO_verify_ssa
- | TODO_verify_stmts | TODO_verify_flow
+ | TODO_verify_stmts | TODO_verify_flow
+ | TODO_cleanup_cfg
}
};
@@ -669,7 +952,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower2", /* name */
- gate_expand_vector_operations, /* gate */
+ 0, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -682,6 +965,7 @@ struct gimple_opt_pass pass_lower_vector
TODO_update_ssa /* todo_flags_finish */
| TODO_verify_ssa
| TODO_verify_stmts | TODO_verify_flow
+ | TODO_cleanup_cfg
}
};
===================================================================
@@ -2615,6 +2615,7 @@ get_gimple_rhs_num_ops (enum tree_code c
|| (SYM) == WIDEN_MULT_MINUS_EXPR \
|| (SYM) == DOT_PROD_EXPR \
|| (SYM) == REALIGN_LOAD_EXPR \
+ || (SYM) == VEC_SHUFFLE_EXPR \
|| (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == COND_EXPR \
|| (SYM) == CONSTRUCTOR \
===================================================================
@@ -3713,6 +3713,7 @@ verify_gimple_assign_ternary (gimple stm
case DOT_PROD_EXPR:
case REALIGN_LOAD_EXPR:
+ case VEC_SHUFFLE_EXPR:
/* FIXME. */
return false;
===================================================================
@@ -1354,7 +1354,6 @@ init_optimization_passes (void)
NEXT_PASS (pass_vectorize);
{
struct opt_pass **p = &pass_vectorize.pass.sub;
- NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_dce_loop);
}
NEXT_PASS (pass_predcom);
@@ -1366,6 +1365,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_lim);
NEXT_PASS (pass_tree_loop_done);
}
+ NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_cse_reciprocals);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_vrp);
===================================================================
@@ -6027,6 +6027,10 @@ c_parser_alignof_expression (c_parser *p
assignment-expression )
__builtin_types_compatible_p ( type-name , type-name )
__builtin_complex ( assignment-expression , assignment-expression )
+ __builtin_shuffle ( assignment-expression , assignment-expression )
+ __builtin_shuffle ( assignment-expression ,
+ assignment-expression ,
+ assignment-expression, )
offsetof-member-designator:
identifier
@@ -6461,6 +6465,43 @@ c_parser_postfix_expression (c_parser *p
(TREE_TYPE (e1.value))),
e1.value, e2.value);
break;
+ case RID_BUILTIN_SHUFFLE:
+ {
+ VEC(tree,gc) *expr_list;
+
+ c_parser_consume_token (parser);
+ if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>"))
+ {
+ expr.value = error_mark_node;
+ break;
+ }
+ loc = c_parser_peek_token (parser)->location;
+
+ expr_list = c_parser_expr_list (parser, false, false, NULL);
+
+ if (!c_parser_require (parser, CPP_CLOSE_PAREN, "expected %<)%>"))
+ {
+ expr.value = error_mark_node;
+ break;
+ }
+
+ if (VEC_length (tree, expr_list) == 2)
+ expr.value = c_build_vec_shuffle_expr
+ (loc, VEC_index (tree, expr_list, 0),
+ VEC_index (tree, expr_list, 0),
+ VEC_index (tree, expr_list, 1));
+ else if (VEC_length (tree, expr_list) == 3)
+ expr.value = c_build_vec_shuffle_expr
+ (loc, VEC_index (tree, expr_list, 0),
+ VEC_index (tree, expr_list, 1),
+ VEC_index (tree, expr_list, 2));
+ else
+ {
+ error_at (loc, "%<__builtin_shuffle%> wrong number of arguments");
+ expr.value = error_mark_node;
+ }
+ break;
+ }
case RID_AT_SELECTOR:
gcc_assert (c_dialect_objc ());
c_parser_consume_token (parser);
===================================================================
@@ -231,6 +231,12 @@ (define_mode_attr sseinsnmode
(V4SF "V4SF") (V2DF "V2DF")
(TI "TI") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
+;; All 128bit vector modes
+(define_mode_attr sseshuffint
+ [(V16QI "V16QI") (V8HI "V8HI")
+ (V4SI "V4SI") (V2DI "V2DI")
+ (V4SF "V4SI") (V2DF "V2DI")])
+
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
@@ -6234,6 +6240,18 @@ (define_expand "vconduv2di"
DONE;
})
+(define_expand "vshuffle<mode>"
+ [(match_operand:V_128 0 "register_operand" "")
+ (match_operand:V_128 1 "general_operand" "")
+ (match_operand:<sseshuffint> 2 "general_operand" "")]
+ "TARGET_SSE3 || TARGET_AVX"
+{
+ bool ok = ix86_expand_vshuffle (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel bitwise logical operations
===================================================================
@@ -118,6 +118,7 @@ extern bool ix86_expand_int_movcc (rtx[]
extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
+extern bool ix86_expand_vshuffle (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
===================================================================
@@ -18693,6 +18693,96 @@ ix86_expand_int_vcond (rtx operands[])
return true;
}
+bool
+ix86_expand_vshuffle (rtx operands[])
+{
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx mask = operands[2];
+ rtx mm, vt, cv0, t1;
+ enum machine_mode mode = GET_MODE (op0);
+ enum machine_mode maskmode = GET_MODE (mask);
+ enum machine_mode maskinner = GET_MODE_INNER (mode);
+ rtx vec[16];
+ int w, i, j;
+
+ gcc_assert ((TARGET_SSE3 || TARGET_AVX) && GET_MODE_BITSIZE (mode) == 128);
+
+ op0 = force_reg (mode, op0);
+ mask = force_reg (maskmode, mask);
+
+ /* Number of elements in the vector. */
+ w = GET_MODE_BITSIZE (maskmode) / GET_MODE_BITSIZE (maskinner);
+
+ /* mask = mask & {w-1, w-1, w-1,...} */
+ for (i = 0; i < w; i++)
+ vec[i] = GEN_INT (w - 1);
+
+ mm = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
+ mm = force_reg (maskmode, mm);
+
+ mask = gen_rtx_AND (maskmode, mask, mm);
+
+ /* Convert mask to vector of chars. */
+ mask = simplify_gen_subreg (V16QImode, mask, maskmode, 0);
+ mask = force_reg (V16QImode, mask);
+
+
+ /* Build a helper mask wich we will use in pshufb
+ (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
+ (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}
+ ... */
+ for (i = 0; i < w; i++)
+ for (j = 0; j < 16/w; j++)
+ vec[i*w+j] = GEN_INT (i*16/w);
+
+ vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ vt = force_reg (V16QImode, vt);
+
+ t1 = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ssse3_pshufbv16qi3 (t1, mask, vt));
+ mm = t1;
+
+ /* MM contains now something like
+ mm = {m[0], .., m[0], m[k], .., m[k], ... }, where
+ m[i] is an index of the element in the vector we are
+ selecting from.
+
+ Convert it into the byte positions by doing
+ mm = mm * {16/w, 16/w, ...}
+ mm = mm + {0,1,..,16/w, 0,1,..,16/w, ...} */
+ for (i = 0; i < 16; i++)
+ vec[i] = GEN_INT (16/w);
+
+ cv0 = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ cv0 = force_reg (V16QImode, cv0);
+ mm = gen_rtx_MULT (V16QImode, mm, cv0);
+
+ for (i = 0; i < w; i++)
+ for (j = 0; j < 16/w; j++)
+ vec[i*w+j] = GEN_INT (j);
+
+ cv0 = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
+ cv0 = force_reg (V16QImode, cv0);
+ mm = gen_rtx_PLUS (V16QImode, mm, cv0);
+ mm = force_reg (V16QImode, mm);
+
+ t1 = gen_reg_rtx (V16QImode);
+
+ /* Convert OP0 to vector of chars. */
+ op0 = simplify_gen_subreg (V16QImode, op0, mode, 0);
+ op0 = force_reg (V16QImode, op0);
+ emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mm));
+
+ /* Convert it back from vector of chars to the original mode. */
+ t1 = simplify_gen_subreg (mode, t1, V16QImode, 0);
+
+ emit_insn (gen_rtx_SET (VOIDmode, target, t1));
+
+ fprintf (stderr, "-- %s called\n", __func__);
+ return true;
+}
+
/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
true if we should do zero extension, else sign extension. HIGH_P is
true if we want the N/2 high elements, else the low elements. */
@@ -30911,6 +31001,9 @@ struct expand_vec_perm_d
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static int extract_vec_perm_cst (struct expand_vec_perm_d *, tree);
+static bool ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask);
+
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
@@ -34576,10 +34669,10 @@ ix86_vectorize_builtin_vec_perm_ok (tree
vec_mask = extract_vec_perm_cst (&d, mask);
- /* This hook is cannot be called in response to something that the
- user does (unlike the builtin expander) so we shouldn't ever see
- an error generated from the extract. */
- gcc_assert (vec_mask > 0 && vec_mask <= 3);
+ /* Check whether the mask can be applied to the vector type. */
+ if (vec_mask < 0 || vec_mask > 3)
+ return false;
+
one_vec = (vec_mask != 3);
/* Implementable with shufps or pshufd. */
===================================================================
@@ -943,6 +943,7 @@ get_expr_operands (gimple stmt, tree *ex
case COND_EXPR:
case VEC_COND_EXPR:
+ case VEC_SHUFFLE_EXPR:
get_expr_operands (stmt, &TREE_OPERAND (expr, 0), uflags);
get_expr_operands (stmt, &TREE_OPERAND (expr, 1), uflags);
get_expr_operands (stmt, &TREE_OPERAND (expr, 2), uflags);