From patchwork Thu Dec 18 10:46:00 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Thomas Schwinge X-Patchwork-Id: 422540 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id A21E81400EA for ; Thu, 18 Dec 2014 21:46:20 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:in-reply-to:references:date:message-id :mime-version:content-type; q=dns; s=default; b=RUVwgqtLJStsLFU7 C46ys7s3zaE8ULY17f0iN352tA9HWqn+4ZKK4LS7Hl6HZDW6Mb4K7urELz42I1I3 iHBNZbBmKzxRHUSqHurlomJIjLv3/bSjKgkxNg4DfmJeaFTrLWNkQhO8CHrGtpZP gSd9KtzGkL5DjB6a14Lf+pD6SBo= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:in-reply-to:references:date:message-id :mime-version:content-type; s=default; bh=wUUbTkiVwaNoMnmmtmhdo/ huwAM=; b=auYcs4XSJqG4PXYOueqMnLqilNFDiKL/YVFBrB+96EuIH6RH31bKo2 f14aJenUcxBCAHoxeXgu1mPbrGHn7btugl9pLRcAM9gDgzCzelpHvFBzZOSG+35Z AN2lIan1H+3mTsFs3s4UywDsGgwdXIz5PaQ32Bqo/J8MznKrcEJSk= Received: (qmail 30165 invoked by alias); 18 Dec 2014 10:46:13 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 30155 invoked by uid 89); 18 Dec 2014 10:46:12 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 18 Dec 2014 10:46:10 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-FEM-01.mgc.mentorg.com) by relay1.mentorg.com with esmtp id 1Y1Yar-0005R7-Dp from Thomas_Schwinge@mentor.com ; Thu, 18 Dec 2014 02:46:05 -0800 Received: from feldtkeller.schwinge.homeip.net (137.202.0.76) by SVR-IES-FEM-01.mgc.mentorg.com (137.202.0.104) with Microsoft SMTP Server id 14.3.181.6; Thu, 18 Dec 2014 10:46:03 +0000 From: Thomas Schwinge To: Jakub Jelinek CC: Subject: Re: OpenACC middle end changes In-Reply-To: <20141113180949.GX5026@tucnak.redhat.com> References: <87fvdnnijk.fsf@schwinge.name> <20141113180949.GX5026@tucnak.redhat.com> User-Agent: Notmuch/0.9-101-g81dad07 (http://notmuchmail.org) Emacs/24.3.1 (x86_64-pc-linux-gnu) Date: Thu, 18 Dec 2014 11:46:00 +0100 Message-ID: <87tx0ts0bb.fsf@schwinge.name> MIME-Version: 1.0 Hi! On Thu, 13 Nov 2014 19:09:49 +0100, Jakub Jelinek wrote: > On Thu, Nov 13, 2014 at 05:59:11PM +0100, Thomas Schwinge wrote: > > --- gcc/builtins.c > > +++ gcc/builtins.c > > +/* Expand OpenACC acc_on_device. > > + > > + This has to happen late (that is, not in early folding; expand_builtin_*, > > + rather than fold_builtin_*), as we have to act differently for host and > > + acceleration device (ACCEL_COMPILER conditional). */ > > + > > +static rtx > > +expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED) > > +{ > > + if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE)) > > + return NULL_RTX; > > + > > + tree arg, v1, v2, ret; > > + location_t loc; > > + > > + arg = CALL_EXPR_ARG (exp, 0); > > + arg = builtin_save_expr (arg); > > + loc = EXPR_LOCATION (exp); > > + > > + /* Build: (arg == v1 || arg == v2) ? 1 : 0. */ > > + > > +#ifdef ACCEL_COMPILER > > + v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_not_host */ 3); > > + v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device); > > +#else > > + v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_none */ 0); > > + v2 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_host */ 2); > > +#endif > > + > > + v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1); > > + v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2); > > + > > + /* Can't use TRUTH_ORIF_EXPR, as that is not supported by > > + expand_expr_real*. */ > > + ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2); > > + ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, > > + ret, integer_one_node, integer_zero_node); > > + > > + return expand_normal (ret); > > If you can't fold it late (which is indeed a problem for -O0), > then I'd suggest to implement this more RTL-ish. > So, avoid the builtin_save_expr, instead > rtx op = expand_normal (arg); > Don't build v1/v2 as trees (and, please fix the TODOs), but rtxes, (acc_device_* TODOs already resolved earlier on.) > just > rtx v1 = GEN_INT (...); > rtx v2 = GEN_INT (...); > machine_mode mode = TYPE_MODE (TREE_TYPE (arg)); > rtx ret = gen_reg_rtx (TYPE_MODE (integer_type_node)); > emit_move_insn (ret, const0_rtx); > rtx_code_label *done_label = gen_label_rtx (); > emit_cmp_and_jump_insns (op, v1, NE, NULL_RTX, mode, > false, done_label, PROB_EVEN); > emit_cmp_and_jump_insns (op, v2, NE, NULL_RTX, mode, > false, done_label, PROB_EVEN); > emit_move_insn (ret, const1_rtx); > emit_label (done_label); > return ret; > or similar. Thanks for the review/suggestion/code! > Note, it would still be worthwhile to fold the builtin, at least > when optimizing, after IPA. Dunno if we have some property you can check, > and Richard B. could suggest where it would be most appropriate (if GIMPLE > guarded match.pd entry, or what), gimple_fold, etc. I'll make a note to have a look at that later on. > I bet I should handle omp_is_initial_device (); similarly. Yeah. Committed to gomp-4_0-branch in r218858: commit da5ad5aec1c0f9b230ecb2dc00620a5598de5066 Author: tschwinge Date: Thu Dec 18 10:42:30 2014 +0000 OpenACC acc_on_device: Make builtin expansion more RTXy. gcc/ * builtins.c (expand_builtin_acc_on_device): Make more RTXy. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@218858 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 5 +++++ gcc/builtins.c | 44 +++++++++++++++++++++----------------------- 2 files changed, 26 insertions(+), 23 deletions(-) Grüße, Thomas diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index b370616..a3650c5 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,3 +1,8 @@ +2014-12-18 Thomas Schwinge + Jakub Jelinek + + * builtins.c (expand_builtin_acc_on_device): Make more RTXy. + 2014-12-17 Thomas Schwinge Bernd Schmidt diff --git gcc/builtins.c gcc/builtins.c index fcf3f53..e946521 100644 --- gcc/builtins.c +++ gcc/builtins.c @@ -5889,38 +5889,36 @@ expand_stack_save (void) acceleration device (ACCEL_COMPILER conditional). */ static rtx -expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED) +expand_builtin_acc_on_device (tree exp, rtx target) { if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE)) return NULL_RTX; - tree arg, v1, v2, ret; - location_t loc; - - arg = CALL_EXPR_ARG (exp, 0); - arg = builtin_save_expr (arg); - loc = EXPR_LOCATION (exp); - - /* Build: (arg == v1 || arg == v2) ? 1 : 0. */ + tree arg = CALL_EXPR_ARG (exp, 0); + /* Return (arg == v1 || arg == v2) ? 1 : 0. */ + machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg)); + rtx v = expand_normal (arg), v1, v2; #ifdef ACCEL_COMPILER - v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NOT_HOST); - v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device); + v1 = GEN_INT (GOMP_DEVICE_NOT_HOST); + v2 = GEN_INT (ACCEL_COMPILER_acc_device); #else - v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NONE); - v2 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_HOST); + v1 = GEN_INT (GOMP_DEVICE_NONE); + v2 = GEN_INT (GOMP_DEVICE_HOST); #endif + machine_mode target_mode = TYPE_MODE (integer_type_node); + if (!REG_P (target) || GET_MODE (target) != target_mode) + target = gen_reg_rtx (target_mode); + emit_move_insn (target, const0_rtx); + rtx_code_label *done_label = gen_label_rtx (); + emit_cmp_and_jump_insns (v, v1, NE, NULL_RTX, v_mode, + false, done_label, PROB_EVEN); + emit_cmp_and_jump_insns (v, v2, NE, NULL_RTX, v_mode, + false, done_label, PROB_EVEN); + emit_move_insn (target, const1_rtx); + emit_label (done_label); - v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1); - v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2); - - /* Can't use TRUTH_ORIF_EXPR, as that is not supported by - expand_expr_real*. */ - ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2); - ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, - ret, integer_one_node, integer_zero_node); - - return expand_normal (ret); + return target; }