From patchwork Tue Aug 18 10:20:43 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ilya Enkovich X-Patchwork-Id: 508247 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 2FB851402B5 for ; Tue, 18 Aug 2015 20:25:21 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=h5vvPlSB; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; q=dns; s= default; b=K7RCgxJdpEkjGRb7lB9lDfbVC/uo9sEZBbpPtDZOnW5rc1mB7lZAh SlyByJDavubsyoMF+vt5PKGP8Sv6ttBNWK7bacvchhdEBDRBnvhxV+KADTa6neEB TRKcMXfgTlRqyiM6XC4HwjWW6e8zrgm139+/SET3YDnAXgzlTKhQys= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; s= default; bh=Oe83OyJ09zu8wdXw6TVkCSAFpOU=; b=h5vvPlSBwLrAaA9N/4DY zMMTJq38eH+DLTUllzzKbKh8gdJTUnsK6RgfU6T+P7yObPUdeTT9LM/pdtI5gpOh Nnvg17shQow2l98zUQaxGcEdkUSep9t2dGnCGuBI5RrqI5HcHw8/q7R4IKtj6QQO 5Av/HghBhiw2RS/mGmDUftg= Received: (qmail 89586 invoked by alias); 18 Aug 2015 10:25:14 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 89554 invoked by uid 89); 18 Aug 2015 10:25:10 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=AWL, BAYES_00, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-ig0-f172.google.com Received: from mail-ig0-f172.google.com (HELO mail-ig0-f172.google.com) (209.85.213.172) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Tue, 18 Aug 2015 10:25:01 +0000 Received: by igui7 with SMTP id i7so76165941igu.0 for ; Tue, 18 Aug 2015 03:24:58 -0700 (PDT) X-Received: by 10.50.57.52 with SMTP id f20mr23725605igq.54.1439893498401; Tue, 18 Aug 2015 03:24:58 -0700 (PDT) Received: from msticlxl57.ims.intel.com ([192.55.54.40]) by smtp.gmail.com with ESMTPSA id y94sm14056025ioi.16.2015.08.18.03.24.56 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 18 Aug 2015 03:24:58 -0700 (PDT) Date: Tue, 18 Aug 2015 13:20:43 +0300 From: Ilya Enkovich To: gcc-patches@gcc.gnu.org Subject: [Scalar masks 6/x] Vectorize bool statements Message-ID: <20150818102043.GF12565@msticlxl57.ims.intel.com> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12) X-IsSubscribed: yes Hi, This patch adds vectorization for statements we can now get due to modified bool patterns. Comparison is transformed into vector comparison + GEN_MASK_EXPR. Other ops basically just get integer type. Thanks, Ilya --- 2015-08-17 Ilya Enkovich * tree-vectorizer.h (vect_var_kind): Add vect_mask_var. (stmt_vec_info_type): Add bool_op_vec_info_type and comparison_vec_info_type. * tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var. (vect_create_destination_var): Likewise. * tree-vect-stmts.c (vectorizable_bool_op): New. (vectorizable_comparison): New. (vect_analyze_stmt): Add vectorizable_bool_op and vectorizable_comparison. (vect_transform_stmt): Support comparison_vec_info_type and bool_op_vec_info_type. diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index f1eaef4..a6a6311 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3849,6 +3849,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) case vect_scalar_var: prefix = "stmp"; break; + case vect_mask_var: + prefix = "mask"; + break; case vect_pointer_var: prefix = "vectp"; break; @@ -4403,7 +4406,11 @@ vect_create_destination_var (tree scalar_dest, tree vectype) tree type; enum vect_var_kind kind; - kind = vectype ? vect_simple_var : vect_scalar_var; + kind = vectype + ? TREE_CODE (vectype) == VECTOR_TYPE + ? vect_simple_var + : vect_mask_var + : vect_scalar_var; type = vectype ? vectype : TREE_TYPE (scalar_dest); gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index d0e0833..301b9e1 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -7454,6 +7454,478 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, return true; } +/* vectorizable_bool_op. + + Check if STMT is a logical expression which is vectorized into + a scalar mask manipulation statement. + + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_bool_op (gimple stmt, gimple_stmt_iterator *gsi, + gimple *vec_stmt, tree reduc_def, + slp_tree slp_node) +{ + tree lhs, rhs1, rhs2; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype1, vectype2; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE; + tree new_temp; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + tree def; + enum vect_def_type dt, dts[4]; + int nunits; + int ncopies; + enum tree_code code; + stmt_vec_info prev_stmt_info = NULL; + int i, j; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + vec vec_oprnds0 = vNULL; + vec vec_oprnds1 = vNULL; + tree mask; + + if (VECTOR_TYPE_P (vectype)) + return false; + + nunits = TYPE_PRECISION (vectype); + + if (slp_node || PURE_SLP_STMT (stmt_info)) + ncopies = 1; + else + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + + gcc_assert (ncopies >= 1); + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def + && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + && reduc_def)) + return false; + + if (STMT_VINFO_LIVE_P (stmt_info)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "value used after loop.\n"); + return false; + } + + if (!is_gimple_assign (stmt)) + return false; + + code = gimple_assign_rhs_code (stmt); + + if (code != BIT_AND_EXPR + && code != BIT_IOR_EXPR + && code != BIT_XOR_EXPR + && code != BIT_NOT_EXPR + && code != NE_EXPR + && code != EQ_EXPR + && code != SSA_NAME) + return false; + + lhs = gimple_assign_lhs (stmt); + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + + if (TREE_TYPE (lhs) != boolean_type_node + || TREE_TYPE (rhs1) != boolean_type_node + || (rhs2 && TREE_TYPE (rhs2) != boolean_type_node)) + return false; + + if (TREE_CODE (rhs1) == SSA_NAME) + { + gimple rhs1_def_stmt = SSA_NAME_DEF_STMT (rhs1); + if (!vect_is_simple_use_1 (rhs1, stmt, loop_vinfo, bb_vinfo, + &rhs1_def_stmt, &def, &dt, &vectype1)) + return false; + } + else + return false; + + if (rhs2 && TREE_CODE (rhs2) == SSA_NAME) + { + gimple rhs2_def_stmt = SSA_NAME_DEF_STMT (rhs2); + if (!vect_is_simple_use_1 (rhs2, stmt, loop_vinfo, bb_vinfo, + &rhs2_def_stmt, &def, &dt, &vectype2)) + return false; + } + /* Constant may appear in binary operation but we actually + shoudn't need it. We don't expect result to be a constant + here and return false on it because constant mask + creation is not supported (and shouldn't be needed). */ + else if (rhs2 && CONSTANT_CLASS_P (rhs2)) + { + if (integer_zerop (rhs2)) + { + switch (code) + { + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case NE_EXPR: + code = SSA_NAME; + break; + case EQ_EXPR: + code = BIT_NOT_EXPR; + break; + default: + return false; + } + } + else + { + gcc_assert (integer_onep (rhs2)); + + switch (code) + { + case BIT_AND_EXPR: + case EQ_EXPR: + code = SSA_NAME; + break; + case NE_EXPR: + case BIT_XOR_EXPR: + code = BIT_NOT_EXPR; + break; + default: + return false; + } + } + rhs2 = NULL; + } + else if (rhs2) + return false; + + if (!vec_stmt) + { + STMT_VINFO_TYPE (stmt_info) = bool_op_vec_info_type; + return true; + } + + /* Transform. */ + if (!slp_node) + { + vec_oprnds0.create (1); + vec_oprnds1.create (1); + } + + /* Use bit operators for comparison. */ + if (code == NE_EXPR) + code = BIT_XOR_EXPR; + else if (code == EQ_EXPR) + code = BIT_AND_EXPR; + + mask = vect_create_destination_var (lhs, vectype); + + /* Handle cmp expr. */ + for (j = 0; j < ncopies; j++) + { + gassign *new_stmt = NULL; + if (j == 0) + { + if (slp_node) + { + auto_vec ops; + auto_vec, 2> vec_defs; + + ops.safe_push (rhs1); + if (rhs2) + ops.safe_push (rhs2); + vect_get_slp_defs (ops, slp_node, &vec_defs, -1); + if (rhs2) + vec_oprnds1 = vec_defs.pop (); + vec_oprnds0 = vec_defs.pop (); + + ops.release (); + vec_defs.release (); + } + else + { + gimple gtemp; + vec_rhs1 + = vect_get_vec_def_for_operand (rhs1, stmt, NULL); + vect_is_simple_use (rhs1, stmt, loop_vinfo, NULL, + >emp, &def, &dts[0]); + if (rhs2) + { + vec_rhs2 = + vect_get_vec_def_for_operand (rhs2, stmt, NULL); + vect_is_simple_use (rhs2, stmt, loop_vinfo, NULL, + >emp, &def, &dts[1]); + } + } + } + else + { + vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0], + vec_oprnds0.pop ()); + if (rhs2) + vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1], + vec_oprnds1.pop ()); + } + + if (!slp_node) + { + vec_oprnds0.quick_push (vec_rhs1); + if (rhs2) + vec_oprnds1.quick_push (vec_rhs2); + } + + /* Arguments are ready. Create the new vector stmt. */ + FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1) + { + if (rhs2) + new_stmt = gimple_build_assign (mask, code, vec_rhs1, + vec_oprnds1[i]); + else + new_stmt = gimple_build_assign (mask, code, vec_rhs1); + + new_temp = make_ssa_name (mask, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + } + + if (slp_node) + continue; + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + vec_oprnds0.release (); + vec_oprnds1.release (); + + return true; +} + +/* vectorizable_comparison. + + Check if STMT is comparison expression that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt using GEN_MASK_EXPR to replace it, put it in VEC_STMT, and insert it + at GSI. + + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_comparison (gimple stmt, gimple_stmt_iterator *gsi, + gimple *vec_stmt, tree reduc_def, + slp_tree slp_node) +{ + tree lhs, rhs1, rhs2; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype1, vectype2; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE; + tree vec_compare; + tree new_temp; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + tree def; + enum vect_def_type dt, dts[4]; + int nunits; + int ncopies; + enum tree_code code; + stmt_vec_info prev_stmt_info = NULL; + int i, j; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + vec vec_oprnds0 = vNULL; + vec vec_oprnds1 = vNULL; + tree vec_cmp_type; + tree mask_type; + tree mask; + tree cmp_res; + + if (VECTOR_TYPE_P (vectype)) + return false; + + mask_type = vectype; + nunits = TYPE_PRECISION (vectype); + + if (slp_node || PURE_SLP_STMT (stmt_info)) + ncopies = 1; + else + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + + gcc_assert (ncopies >= 1); + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def + && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + && reduc_def)) + return false; + + if (STMT_VINFO_LIVE_P (stmt_info)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "value used after loop.\n"); + return false; + } + + if (!is_gimple_assign (stmt)) + return false; + + code = gimple_assign_rhs_code (stmt); + + if (code != LT_EXPR + && code != LE_EXPR + && code != GT_EXPR + && code != GE_EXPR + && code != EQ_EXPR + && code != NE_EXPR) + return false; + + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + + if (TREE_CODE (rhs1) == SSA_NAME) + { + gimple rhs1_def_stmt = SSA_NAME_DEF_STMT (rhs1); + if (!vect_is_simple_use_1 (rhs1, stmt, loop_vinfo, bb_vinfo, + &rhs1_def_stmt, &def, &dt, &vectype1)) + return false; + } + else if (TREE_CODE (rhs1) != INTEGER_CST && TREE_CODE (rhs1) != REAL_CST + && TREE_CODE (rhs1) != FIXED_CST) + return false; + + if (TREE_CODE (rhs2) == SSA_NAME) + { + gimple rhs2_def_stmt = SSA_NAME_DEF_STMT (rhs2); + if (!vect_is_simple_use_1 (rhs2, stmt, loop_vinfo, bb_vinfo, + &rhs2_def_stmt, &def, &dt, &vectype2)) + return false; + } + else if (TREE_CODE (rhs2) != INTEGER_CST && TREE_CODE (rhs2) != REAL_CST + && TREE_CODE (rhs2) != FIXED_CST) + return false; + + vectype = vectype1 ? vectype1 : vectype2; + + if (!VECTOR_TYPE_P (vectype)) + return false; + + if (nunits != TYPE_VECTOR_SUBPARTS (vectype)) + return false; + + unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))); + /* The result of a vector comparison should be signed type. */ + tree cmp_type = build_nonstandard_integer_type (prec, 0); + vec_cmp_type = get_same_sized_vectype (cmp_type, vectype); + if (vec_cmp_type == NULL_TREE) + return false; + + if (!vec_stmt) + { + STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; + return expand_gen_mask_expr_p (vectype); + } + + /* Transform. */ + if (!slp_node) + { + vec_oprnds0.create (1); + vec_oprnds1.create (1); + } + + /* Handle def. */ + lhs = gimple_assign_lhs (stmt); + mask = vect_create_destination_var (lhs, mask_type); + cmp_res = vect_get_new_vect_var (vec_cmp_type, vect_simple_var, NULL); + + /* Handle cmp expr. */ + for (j = 0; j < ncopies; j++) + { + gassign *new_stmt = NULL; + if (j == 0) + { + if (slp_node) + { + auto_vec ops; + auto_vec, 2> vec_defs; + + ops.safe_push (rhs1); + ops.safe_push (rhs2); + vect_get_slp_defs (ops, slp_node, &vec_defs, -1); + vec_oprnds1 = vec_defs.pop (); + vec_oprnds0 = vec_defs.pop (); + + ops.release (); + vec_defs.release (); + } + else + { + gimple gtemp; + vec_rhs1 + = vect_get_vec_def_for_operand (rhs1, stmt, NULL); + vect_is_simple_use (rhs1, stmt, loop_vinfo, NULL, + >emp, &def, &dts[0]); + vec_rhs2 = + vect_get_vec_def_for_operand (rhs2, stmt, NULL); + vect_is_simple_use (rhs2, stmt, loop_vinfo, NULL, + >emp, &def, &dts[1]); + } + } + else + { + vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0], + vec_oprnds0.pop ()); + vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1], + vec_oprnds1.pop ()); + } + + if (!slp_node) + { + vec_oprnds0.quick_push (vec_rhs1); + vec_oprnds1.quick_push (vec_rhs2); + } + + /* Arguments are ready. Create the new vector stmt. */ + FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1) + { + vec_rhs2 = vec_oprnds1[i]; + + vec_compare = build2 (code, vec_cmp_type, vec_rhs1, vec_rhs2); + new_stmt = gimple_build_assign (cmp_res, vec_compare); + new_temp = make_ssa_name (cmp_res, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + + vec_compare = build1 (GEN_MASK_EXPR, mask_type, new_temp); + new_stmt = gimple_build_assign (mask, vec_compare); + new_temp = make_ssa_name (mask, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + } + + if (slp_node) + continue; + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + vec_oprnds0.release (); + vec_oprnds1.release (); + + return true; +} /* Make sure the statement is vectorizable. */ @@ -7657,7 +8129,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) || vectorizable_call (stmt, NULL, NULL, node) || vectorizable_store (stmt, NULL, NULL, node) || vectorizable_reduction (stmt, NULL, NULL, node) - || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); + || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node) + || vectorizable_comparison (stmt, NULL, NULL, NULL, node) + || vectorizable_bool_op (stmt, NULL, NULL, NULL, node)); else { if (bb_vinfo) @@ -7669,7 +8143,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) || vectorizable_load (stmt, NULL, NULL, node, NULL) || vectorizable_call (stmt, NULL, NULL, node) || vectorizable_store (stmt, NULL, NULL, node) - || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); + || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node) + || vectorizable_comparison (stmt, NULL, NULL, NULL, node) + || vectorizable_bool_op (stmt, NULL, NULL, NULL, node)); } if (!ok) @@ -7780,6 +8256,16 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, is_store = true; break; + case comparison_vec_info_type: + done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node); + gcc_assert (done); + break; + + case bool_op_vec_info_type: + done = vectorizable_bool_op (stmt, gsi, &vec_stmt, NULL, slp_node); + gcc_assert (done); + break; + case condition_vec_info_type: done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); gcc_assert (done); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4b98378..501dd21 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -28,7 +28,8 @@ along with GCC; see the file COPYING3. If not see enum vect_var_kind { vect_simple_var, vect_pointer_var, - vect_scalar_var + vect_scalar_var, + vect_mask_var }; /* Defines type of operation. */ @@ -478,9 +479,11 @@ enum stmt_vec_info_type { store_vec_info_type, shift_vec_info_type, op_vec_info_type, + bool_op_vec_info_type, call_vec_info_type, call_simd_clone_vec_info_type, assignment_vec_info_type, + comparison_vec_info_type, condition_vec_info_type, reduc_vec_info_type, induc_vec_info_type,