From patchwork Mon Aug 9 11:02:23 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Biener X-Patchwork-Id: 61264 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 3EA21B70CE for ; Mon, 9 Aug 2010 21:02:43 +1000 (EST) Received: (qmail 20015 invoked by alias); 9 Aug 2010 11:02:38 -0000 Received: (qmail 19663 invoked by uid 22791); 9 Aug 2010 11:02:33 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, TW_FG, TW_TM X-Spam-Check-By: sourceware.org Received: from mail-gx0-f175.google.com (HELO mail-gx0-f175.google.com) (209.85.161.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 09 Aug 2010 11:02:25 +0000 Received: by gxk2 with SMTP id 2so3758543gxk.20 for ; Mon, 09 Aug 2010 04:02:23 -0700 (PDT) MIME-Version: 1.0 Received: by 10.100.29.28 with SMTP id c28mr16534464anc.227.1281351743315; Mon, 09 Aug 2010 04:02:23 -0700 (PDT) Received: by 10.231.199.134 with HTTP; Mon, 9 Aug 2010 04:02:23 -0700 (PDT) In-Reply-To: References: <4C34E6CF.4030608@redhat.com> <4C3607AD.50406@redhat.com> <4C3630BD.3040807@redhat.com> Date: Mon, 9 Aug 2010 13:02:23 +0200 Message-ID: Subject: Re: [patch] Support vectorization of min/max location pattern - take 2 From: Richard Guenther To: Ira Rosen Cc: gcc-patches@gcc.gnu.org, Richard Henderson X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Mon, Aug 9, 2010 at 1:00 PM, Richard Guenther Missing attachment. Index: gcc/targhooks.c =================================================================== --- gcc/targhooks.c (revision 162841) +++ gcc/targhooks.c (working copy) @@ -954,6 +954,13 @@ default_builtin_vector_alignment_reachab return true; } +tree +default_builtin_vec_compare (gimple_stmt_iterator *gsi, tree type, tree v0, + tree v1, enum tree_code code) +{ + return false; +} + /* By default, assume that a target supports any factor of misalignment memory access if it supports movmisalign patten. is_packed is true if the memory access is defined in a packed struct. */ Index: gcc/target.def =================================================================== --- gcc/target.def (revision 162841) +++ gcc/target.def (working copy) @@ -830,6 +830,13 @@ DEFHOOK bool, (tree vec_type, tree mask), hook_bool_tree_tree_true) +/* Implement hardware vector comparison or return false. */ +DEFHOOK +(builtin_vec_compare, + "", + tree, (gimple_stmt_iterator *gsi, tree type, tree v0, tree v1, enum tree_code code), + default_builtin_vec_compare) + /* Return true if the target supports misaligned store/load of a specific factor denoted in the third parameter. The last parameter is true if the access is defined in a packed struct. */ Index: gcc/tree.c =================================================================== --- gcc/tree.c (revision 162841) +++ gcc/tree.c (working copy) @@ -1360,6 +1360,28 @@ build_vector_from_ctor (tree type, VEC(c return build_vector (type, nreverse (list)); } +/* Build a vector of type VECTYPE where all the elements are SCs. */ +tree +build_vector_from_val (const tree sc, const tree vectype) +{ + tree t = NULL_TREE; + int i, nunits = TYPE_VECTOR_SUBPARTS (vectype); + + if (sc == error_mark_node) + return sc; + + gcc_assert (TREE_TYPE (sc) == TREE_TYPE (vectype)); + + for (i = 0; i < nunits; ++i) + t = tree_cons (NULL_TREE, sc, t); + + if (CONSTANT_CLASS_P (sc)) + return build_vector (vectype, t); + else + return build_constructor_from_list (vectype, t); +} + + /* Return a new CONSTRUCTOR node whose type is TYPE and whose values are in the VEC pointed to by VALS. */ tree Index: gcc/tree.h =================================================================== --- gcc/tree.h (revision 162841) +++ gcc/tree.h (working copy) @@ -4029,6 +4029,7 @@ extern tree build_int_cst_type (tree, HO extern tree build_int_cst_wide (tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT); extern tree build_vector (tree, tree); extern tree build_vector_from_ctor (tree, VEC(constructor_elt,gc) *); +extern tree build_vector_from_val (const tree, const tree); extern tree build_constructor (tree, VEC(constructor_elt,gc) *); extern tree build_constructor_single (tree, tree, tree); extern tree build_constructor_from_list (tree, tree); Index: gcc/target.h =================================================================== --- gcc/target.h (revision 162841) +++ gcc/target.h (working copy) @@ -51,7 +51,7 @@ #include "tm.h" #include "insn-modes.h" - +#include "gimple.h" /* Types used by the record_gcc_switches() target function. */ typedef enum { Index: gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c =================================================================== --- gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c (revision 0) +++ gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c (revision 0) @@ -0,0 +1,109 @@ +#define vector(elcount, type) \ +__attribute__((vector_size((elcount)*sizeof(type)))) type + +#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx)) + +#define check_compare(type, count, res, i0, i1, op) \ +do { \ + int __i; \ + for (__i = 0; __i < count; __i ++) { \ + if (vidx (type, res, __i) != \ + ((vidx (type, i0, __i) op vidx (type, i1, __i)) ? (type)-1 : 0)) { \ + __builtin_printf ("%i != ((%i " #op " %i) ? -1 : 0) ", vidx (type, res, __i), \ + vidx (type, i0, __i), vidx (type, i1, __i)); \ + __builtin_abort (); \ + } \ + } \ +} while (0) + +#define test(type, count, v0, v1, res); \ +do { \ + res = (v0 > v1); \ + check_compare (type, count, res, v0, v1, >); \ + res = (v0 < v1); \ + check_compare (type, count, res, v0, v1, <); \ + res = (v0 >= v1); \ + check_compare (type, count, res, v0, v1, >=); \ + res = (v0 <= v1); \ + check_compare (type, count, res, v0, v1, <=); \ + res = (v0 == v1); \ + check_compare (type, count, res, v0, v1, ==); \ + res = (v0 != v1); \ + check_compare (type, count, res, v0, v1, !=); \ +} while (0) + + +int main (int argc, char *argv[]) { +#define INT int + vector (4, INT) i0; + vector (4, INT) i1; + vector (4, int) ires; + + i0 = (vector (4, INT)){argc, 1, 2, 10}; + i1 = (vector (4, INT)){0, 3, 2, (INT)-23}; + test (INT, 4, i0, i1, ires); +#undef INT + + +#define INT unsigned int + vector (4, int) ures; + vector (4, INT) u0; + vector (4, INT) u1; + + u0 = (vector (4, INT)){argc, 1, 2, 10}; + u1 = (vector (4, INT)){0, 3, 2, (INT)-23}; + test (INT, 4, u0, u1, ures); +#undef INT + + +#define SHORT short + vector (8, SHORT) s0; + vector (8, SHORT) s1; + vector (8, short) sres; + + s0 = (vector (8, SHORT)){argc, 1, 2, 10, 6, 87, (SHORT)-5, 2}; + s1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0}; + test (SHORT, 8, s0, s1, sres); +#undef SHORT + +#define SHORT unsigned short + vector (8, SHORT) us0; + vector (8, SHORT) us1; + vector (8, short) usres; + + us0 = (vector (8, SHORT)){argc, 1, 2, 10, 6, 87, (SHORT)-5, 2}; + us1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0}; + test (SHORT, 8, us0, us1, usres); +#undef SHORT + + +#define CHAR signed char + vector (16, CHAR) c0; + vector (16, CHAR) c1; + vector (16, signed char) cres; + + c0 = (vector (16, CHAR)){argc, 1, 2, 10, 6, 87, (CHAR)-5, 2, \ + argc, 1, 2, 10, 6, 87, (CHAR)-5, 2 }; + + c1 = (vector (16, CHAR)){0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0, \ + 0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0}; + test (CHAR, 16, c0, c1, cres); +#undef CHAR + +#define CHAR char + vector (16, CHAR) uc0; + vector (16, CHAR) uc1; + vector (16, signed char) ucres; + + uc0 = (vector (16, CHAR)){argc, 1, 2, 10, 6, 87, (CHAR)-5, 2, \ + argc, 1, 2, 10, 6, 87, (CHAR)-5, 2 }; + + uc1 = (vector (16, CHAR)){0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0, \ + 0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0}; + test (CHAR, 16, uc0, uc1, ucres); +#undef CHAR + + + return 0; +} + Index: gcc/c-typeck.c =================================================================== --- gcc/c-typeck.c (revision 162841) +++ gcc/c-typeck.c (working copy) @@ -9606,6 +9606,29 @@ build_binary_op (location_t location, en case EQ_EXPR: case NE_EXPR: + if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) + { + tree intt; + if (TREE_TYPE (type0) != TREE_TYPE (type1)) + { + error_at (location, "comparing vectors with different " + "element types"); + return error_mark_node; + } + + if (TYPE_VECTOR_SUBPARTS (type0) != TYPE_VECTOR_SUBPARTS (type1)) + { + error_at (location, "comparing vectors with different " + "number of elements"); + return error_mark_node; + } + + /* Always construct signed integer vector type. */ + intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE (type0)),0); + result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS (type0)); + converted = 1; + break; + } if (FLOAT_TYPE_P (type0) || FLOAT_TYPE_P (type1)) warning_at (location, OPT_Wfloat_equal, @@ -9718,6 +9741,29 @@ build_binary_op (location_t location, en case GE_EXPR: case LT_EXPR: case GT_EXPR: + if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) + { + tree intt; + if (TREE_TYPE (type0) != TREE_TYPE (type1)) + { + error_at (location, "comparing vectors with different " + "element types"); + return error_mark_node; + } + + if (TYPE_VECTOR_SUBPARTS (type0) != TYPE_VECTOR_SUBPARTS (type1)) + { + error_at (location, "comparing vectors with different " + "number of elements"); + return error_mark_node; + } + + /* Always construct signed integer vector type. */ + intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE (type0)),0); + result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS (type0)); + converted = 1; + break; + } build_type = integer_type_node; if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE || code0 == FIXED_POINT_TYPE) @@ -10113,6 +10159,10 @@ c_objc_common_truthvalue_conversion (loc case FUNCTION_TYPE: gcc_unreachable (); + case VECTOR_TYPE: + error_at (location, "used vector type where scalar is required"); + return error_mark_node; + default: break; } Index: gcc/tree-vect-generic.c =================================================================== --- gcc/tree-vect-generic.c (revision 162841) +++ gcc/tree-vect-generic.c (working copy) @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. #include "tree-pass.h" #include "flags.h" #include "ggc.h" +#include "target.h" /* Need to include rtl.h, expr.h, etc. for optabs. */ #include "expr.h" @@ -125,6 +126,21 @@ do_binop (gimple_stmt_iterator *gsi, tre return gimplify_build2 (gsi, code, inner_type, a, b); } + +/* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0; */ +static tree +do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, + tree bitpos, tree bitsize, enum tree_code code) +{ + tree cond; + a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); + b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); + cond = gimplify_build2 (gsi, code, inner_type, a, b); + return gimplify_build3 (gsi, COND_EXPR, inner_type, cond, + build_int_cst (inner_type, -1), + build_int_cst (inner_type, 0)); +} + /* Expand vector addition to scalars. This does bit twiddling in order to increase parallelism: @@ -284,6 +300,21 @@ expand_vector_addition (gimple_stmt_iter a, b, code); } +/* Try a hardware hook for vector comparison or + extract comparison piecewise. */ +static tree +expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, + tree op1, enum tree_code code) +{ + tree t = targetm.vectorize.builtin_vec_compare (gsi, type, op0, op1, code); + + if (t == NULL_TREE) + t = expand_vector_piecewise (gsi, do_compare, type, + TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); + return t; + +} + static tree expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, gimple assign, enum tree_code code) @@ -326,8 +357,24 @@ expand_vector_operation (gimple_stmt_ite case BIT_NOT_EXPR: return expand_vector_parallel (gsi, do_unop, type, gimple_assign_rhs1 (assign), - NULL_TREE, code); - + NULL_TREE, code); + case EQ_EXPR: + case NE_EXPR: + case GT_EXPR: + case LT_EXPR: + case GE_EXPR: + case LE_EXPR: + case UNEQ_EXPR: + case UNGT_EXPR: + case UNLT_EXPR: + case UNGE_EXPR: + case UNLE_EXPR: + case LTGT_EXPR: + case ORDERED_EXPR: + case UNORDERED_EXPR: + return expand_vector_comparison (gsi, type, + gimple_assign_rhs1 (assign), + gimple_assign_rhs2 (assign), code); default: break; } Index: gcc/tree-cfg.c =================================================================== --- gcc/tree-cfg.c (revision 162841) +++ gcc/tree-cfg.c (working copy) @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. #include "flags.h" #include "function.h" #include "ggc.h" +#include "c-lang.h" #include "langhooks.h" #include "tree-pretty-print.h" #include "gimple-pretty-print.h" @@ -3144,6 +3145,39 @@ verify_gimple_comparison (tree type, tre return true; } + if (TREE_CODE (op0_type) == VECTOR_TYPE + && TREE_CODE (op1_type) == VECTOR_TYPE + && TREE_CODE (type) == VECTOR_TYPE) + { + tree t; + if (TYPE_VECTOR_SUBPARTS (op0_type) != TYPE_VECTOR_SUBPARTS (op1_type)) + { + error ("invalid vector comparison, number of elements do not match"); + debug_generic_expr (op0_type); + debug_generic_expr (op1_type); + return true; + } + + if (TREE_TYPE (op0_type) != TREE_TYPE (op1_type)) + { + error ("invalid vector comparison, vector element type mismatch"); + debug_generic_expr (op0_type); + debug_generic_expr (op1_type); + return true; + } + + if (TYPE_VECTOR_SUBPARTS (type) != TYPE_VECTOR_SUBPARTS (op0_type) + && TYPE_PRECISION (TREE_TYPE (op0_type)) + != TYPE_PRECISION (TREE_TYPE (type))) + { + error ("invalid vector comparison resulting type"); + debug_generic_expr (type); + return true; + } + + return false; + } + /* For comparisons we do not have the operations type as the effective type the comparison is carried out in. Instead we require that either the first operand is trivially Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 162841) +++ gcc/config/i386/i386.c (working copy) @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. #include "tm.h" #include "rtl.h" #include "tree.h" +#include "tree-flow.h" #include "tm_p.h" #include "regs.h" #include "hard-reg-set.h" @@ -30050,6 +30051,276 @@ ix86_vectorize_builtin_vec_perm (tree ve return ix86_builtins[(int) fcode]; } +/* Find target specific sequence for vector comparison of + real-type vectors V0 and V1. Returns variable containing + result of the comparison or NULL_TREE in other case. */ +static tree +vector_fp_compare (gimple_stmt_iterator *gsi, tree rettype, + enum machine_mode mode, tree v0, tree v1, + enum tree_code code) +{ + enum ix86_builtins fcode; + int arg = -1; + tree fdef, frtype, tmp, var, t; + gimple new_stmt; + bool reverse = false; + +#define SWITCH_MODE(mode, fcode, code, value) \ +switch (mode) \ + { \ + case V2DFmode: \ + if (!TARGET_SSE2) return NULL_TREE; \ + fcode = IX86_BUILTIN_CMP ## code ## PD; \ + break; \ + case V4DFmode: \ + if (!TARGET_AVX) return NULL_TREE; \ + fcode = IX86_BUILTIN_CMPPD256; \ + arg = value; \ + break; \ + case V4SFmode: \ + if (!TARGET_SSE) return NULL_TREE; \ + fcode = IX86_BUILTIN_CMP ## code ## SS; \ + break; \ + case V8SFmode: \ + if (!TARGET_AVX) return NULL_TREE; \ + fcode = IX86_BUILTIN_CMPPS256; \ + arg = value; \ + break; \ + default: \ + return NULL_TREE; \ + /* FIXME: Similar instructions for MMX. */ \ + } + + switch (code) + { + case EQ_EXPR: + SWITCH_MODE (mode, fcode, EQ, 0); + break; + + case NE_EXPR: + SWITCH_MODE (mode, fcode, NEQ, 4); + break; + + case GT_EXPR: + SWITCH_MODE (mode, fcode, LT, 1); + reverse = true; + break; + + case LT_EXPR: + SWITCH_MODE (mode, fcode, LT, 1); + break; + + case LE_EXPR: + SWITCH_MODE (mode, fcode, LE, 2); + break; + + case GE_EXPR: + SWITCH_MODE (mode, fcode, LE, 2); + reverse = true; + break; + + default: + return NULL_TREE; + } +#undef SWITCH_MODE + + fdef = ix86_builtins[(int)fcode]; + frtype = TREE_TYPE (TREE_TYPE (fdef)); + + tmp = create_tmp_var (frtype, "tmp"); + var = create_tmp_var (rettype, "tmp"); + + if (arg == -1) + if (reverse) + new_stmt = gimple_build_call (fdef, 2, v1, v0); + else + new_stmt = gimple_build_call (fdef, 2, v0, v1); + else + if (reverse) + new_stmt = gimple_build_call (fdef, 3, v0, v1, + build_int_cst (char_type_node, arg)); + else + new_stmt = gimple_build_call (fdef, 3, v1, v0, + build_int_cst (char_type_node, arg)); + + gimple_call_set_lhs (new_stmt, tmp); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, rettype, tmp); + new_stmt = gimple_build_assign (var, t); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + return var; +} + +/* Find target specific sequence for vector comparison of + integer-type vectors V0 and V1. Returns variable containing + result of the comparison or NULL_TREE in other case. */ +static tree +vector_int_compare (gimple_stmt_iterator *gsi, tree rettype, + enum machine_mode mode, tree v0, tree v1, + enum tree_code code) +{ + enum ix86_builtins feq, fgt; + tree var, t, tmp, tmp1, tmp2, defeq, defgt, gtrtype, eqrtype; + gimple new_stmt; + + switch (mode) + { + /* SSE integer-type vectors. */ + case V2DImode: + if (!TARGET_SSE4_2) return NULL_TREE; + feq = IX86_BUILTIN_PCMPEQQ; + fgt = IX86_BUILTIN_PCMPGTQ; + break; + + case V4SImode: + if (!TARGET_SSE2) return NULL_TREE; + feq = IX86_BUILTIN_PCMPEQD128; + fgt = IX86_BUILTIN_PCMPGTD128; + break; + + case V8HImode: + if (!TARGET_SSE2) return NULL_TREE; + feq = IX86_BUILTIN_PCMPEQW128; + fgt = IX86_BUILTIN_PCMPGTW128; + break; + + case V16QImode: + if (!TARGET_SSE2) return NULL_TREE; + feq = IX86_BUILTIN_PCMPEQB128; + fgt = IX86_BUILTIN_PCMPGTB128; + break; + + /* MMX integer-type vectors. */ + case V2SImode: + if (!TARGET_MMX) return NULL_TREE; + feq = IX86_BUILTIN_PCMPEQD; + fgt = IX86_BUILTIN_PCMPGTD; + break; + + case V4HImode: + if (!TARGET_MMX) return NULL_TREE; + feq = IX86_BUILTIN_PCMPEQW; + fgt = IX86_BUILTIN_PCMPGTW; + break; + + case V8QImode: + if (!TARGET_MMX) return NULL_TREE; + feq = IX86_BUILTIN_PCMPEQB; + fgt = IX86_BUILTIN_PCMPGTB; + break; + + /* FIXME: Similar instructions for AVX. */ + default: + return NULL_TREE; + } + + + var = create_tmp_var (rettype, "ret"); + defeq = ix86_builtins[(int)feq]; + defgt = ix86_builtins[(int)fgt]; + eqrtype = TREE_TYPE (TREE_TYPE (defeq)); + gtrtype = TREE_TYPE (TREE_TYPE (defgt)); + +#define EQGT_CALL(gsi, stmt, var, op0, op1, gteq) \ +do { \ + var = create_tmp_var (gteq ## rtype, "tmp"); \ + stmt = gimple_build_call (def ## gteq, 2, op0, op1); \ + gimple_call_set_lhs (stmt, var); \ + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); \ +} while (0) + + switch (code) + { + case EQ_EXPR: + EQGT_CALL (gsi, new_stmt, tmp, v0, v1, eq); + break; + + case NE_EXPR: + tmp = create_tmp_var (eqrtype, "tmp"); + + EQGT_CALL (gsi, new_stmt, tmp1, v0, v1, eq); + EQGT_CALL (gsi, new_stmt, tmp2, v0, v0, eq); + + /* t = tmp1 ^ {-1, -1,...} */ + t = gimplify_build2 (gsi, BIT_XOR_EXPR, eqrtype, tmp1, tmp2); + new_stmt = gimple_build_assign (tmp, t); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + break; + + case GT_EXPR: + EQGT_CALL (gsi, new_stmt, tmp, v0, v1, gt); + break; + + case LT_EXPR: + EQGT_CALL (gsi, new_stmt, tmp, v1, v0, gt); + break; + + case GE_EXPR: + if (eqrtype != gtrtype) + return NULL_TREE; + tmp = create_tmp_var (eqrtype, "tmp"); + EQGT_CALL (gsi, new_stmt, tmp1, v0, v1, gt); + EQGT_CALL (gsi, new_stmt, tmp2, v0, v1, eq); + t = gimplify_build2 (gsi, BIT_IOR_EXPR, eqrtype, tmp1, tmp2); + new_stmt = gimple_build_assign (tmp, t); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + break; + + case LE_EXPR: + if (eqrtype != gtrtype) + return NULL_TREE; + tmp = create_tmp_var (eqrtype, "tmp"); + EQGT_CALL (gsi, new_stmt, tmp1, v1, v0, gt); + EQGT_CALL (gsi, new_stmt, tmp2, v0, v1, eq); + t = gimplify_build2 (gsi, BIT_IOR_EXPR, eqrtype, tmp1, tmp2); + new_stmt = gimple_build_assign (tmp, t); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + break; + + default: + return NULL_TREE; + } +#undef EQGT_CALL + + t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, rettype, tmp); + new_stmt = gimple_build_assign (var, t); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + return var; +} + +/* Lower a comparison of two vectors V0 and V1, returning a + variable with the result of comparison. Returns NULL_TREE + when it is impossible to find a target specific sequence. */ +static tree +ix86_vectorize_builtin_vec_compare (gimple_stmt_iterator *gsi, tree rettype, + tree v0, tree v1, enum tree_code code) +{ + tree type; + + /* Make sure we are comparing the same types. */ + if (TREE_TYPE (v0) != TREE_TYPE (v1) + || TREE_TYPE (TREE_TYPE (v0)) != TREE_TYPE (TREE_TYPE (v1))) + return NULL_TREE; + + type = TREE_TYPE (v0); + + /* Cannot compare packed unsigned integers + unless it is EQ or NEQ operations. */ + if (TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE + && TYPE_UNSIGNED (TREE_TYPE (type))) + if (code != EQ_EXPR && code != NE_EXPR) + return NULL_TREE; + + + if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) + return vector_fp_compare (gsi, rettype, TYPE_MODE (type), v0, v1, code); + else if (TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE) + return vector_int_compare (gsi, rettype, TYPE_MODE (type), v0, v1, code); + else + return NULL_TREE; +} + /* Return a vector mode with twice as many elements as VMODE. */ /* ??? Consider moving this to a table generated by genmodes.c. */ @@ -31541,6 +31812,11 @@ ix86_enum_va_list (int idx, const char * #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \ ix86_vectorize_builtin_vec_perm_ok +#undef TARGET_VECTORIZE_BUILTIN_VEC_COMPARE +#define TARGET_VECTORIZE_BUILTIN_VEC_COMPARE \ + ix86_vectorize_builtin_vec_compare + + #undef TARGET_SET_CURRENT_FUNCTION #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function