From patchwork Fri Nov 12 02:05:34 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 70910 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id AA191B7144 for ; Fri, 12 Nov 2010 13:05:58 +1100 (EST) Received: (qmail 20071 invoked by alias); 12 Nov 2010 02:05:56 -0000 Received: (qmail 20039 invoked by uid 22791); 12 Nov 2010 02:05:52 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_NONE, TW_DF X-Spam-Check-By: sourceware.org Received: from a.mail.sonic.net (HELO a.mail.sonic.net) (64.142.16.245) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 12 Nov 2010 02:05:45 +0000 Received: from are.twiddle.net (are.twiddle.net [75.101.38.216]) by a.mail.sonic.net (8.13.8.Beta0-Sonic/8.13.7) with ESMTP id oAC25aMh020683 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Thu, 11 Nov 2010 18:05:36 -0800 Received: from are.twiddle.net (localhost [127.0.0.1]) by are.twiddle.net (8.14.4/8.14.4) with ESMTP id oAC25ZLh012980; Thu, 11 Nov 2010 18:05:35 -0800 Received: (from rth@localhost) by are.twiddle.net (8.14.4/8.14.4/Submit) id oAC25Ybo012977; Thu, 11 Nov 2010 18:05:34 -0800 Date: Thu, 11 Nov 2010 18:05:34 -0800 From: Richard Henderson To: gcc-patches@gcc.gnu.org Cc: dje.gcc@gmail.com, uweigand@de.ibm.com, trevor_smigiel@playstation.sony.com Subject: [patch 6/N][spu] convert to fma Message-ID: <20101112020534.GA12839@twiddle.net> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This port wasn't using -mfused-madd, and so it doesn't add it as a deprecated flag. I do convert the existing fma insns to use the fma opcode. There is one possible problem: the description of the fnms insn does not match what I would expect from IBM, nor does it match the dfnms insn. It's that latter point that concerns me mostly. The patch builds a cross-compiler, but is otherwise untested. r~ diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def index 9e92781..0687707 100644 --- a/gcc/config/spu/spu-builtins.def +++ b/gcc/config/spu/spu-builtins.def @@ -171,13 +171,13 @@ DEF_BUILTIN (SI_FS, CODE_FOR_subv4sf3, "si_fs", B_INSN, DEF_BUILTIN (SI_DFS, CODE_FOR_subv2df3, "si_dfs", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FM, CODE_FOR_mulv4sf3, "si_fm", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_DFM, CODE_FOR_mulv2df3, "si_dfm", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) -DEF_BUILTIN (SI_FMA, CODE_FOR_fma_v4sf, "si_fma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) -DEF_BUILTIN (SI_DFMA, CODE_FOR_fma_v2df, "si_dfma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) -DEF_BUILTIN (SI_DFNMA, CODE_FOR_fnma_v2df, "si_dfnma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) -DEF_BUILTIN (SI_FNMS, CODE_FOR_fnms_v4sf, "si_fnms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) -DEF_BUILTIN (SI_DFNMS, CODE_FOR_fnms_v2df, "si_dfnms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) -DEF_BUILTIN (SI_FMS, CODE_FOR_fms_v4sf, "si_fms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) -DEF_BUILTIN (SI_DFMS, CODE_FOR_fms_v2df, "si_dfms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FMA, CODE_FOR_fmav4sf4, "si_fma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFMA, CODE_FOR_fmav2df4, "si_dfma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFNMA, CODE_FOR_nfmav2df4, "si_dfnma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FNMS, CODE_FOR_fnmav4sf4, "si_fnms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFNMS, CODE_FOR_nfmsv2df4, "si_dfnms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FMS, CODE_FOR_fmsv4sf4, "si_fms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFMS, CODE_FOR_fmsv2df4, "si_dfms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FREST, CODE_FOR_frest_v4sf, "si_frest", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FRSQEST, CODE_FOR_frsqest_v4sf, "si_frsqest", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) DEF_BUILTIN (SI_FI, CODE_FOR_fi_v4sf, "si_fi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) @@ -242,7 +242,7 @@ DEF_BUILTIN (SPU_MULH, CODE_FOR_spu_mpyh, "spu_mulh", B_INSN, DEF_BUILTIN (SPU_MULSR, CODE_FOR_spu_mpys, "spu_mulsr", B_INSN, _A3(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI)) DEF_BUILTIN (SPU_FREST, CODE_FOR_frest_v4sf, "spu_frest", B_INSN, _A2(SPU_BTI_V4SF, SPU_BTI_V4SF)) DEF_BUILTIN (SPU_FRSQEST, CODE_FOR_frsqest_v4sf, "spu_frsqest", B_INSN, _A2(SPU_BTI_V4SF, SPU_BTI_V4SF)) -DEF_BUILTIN (SPU_NMADD, CODE_FOR_fnma_v2df, "spu_nmadd", B_INSN, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_NMADD, CODE_FOR_nfmav2df4, "spu_nmadd", B_INSN, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) DEF_BUILTIN (SPU_ABSD, CODE_FOR_spu_absdb, "spu_absd", B_INSN, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) DEF_BUILTIN (SPU_AVG, CODE_FOR_spu_avgb, "spu_avg", B_INSN, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) DEF_BUILTIN (SPU_SUMB, CODE_FOR_spu_sumb, "spu_sumb", B_INSN, _A3(SPU_BTI_UV8HI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) @@ -286,11 +286,11 @@ DEF_BUILTIN (SPU_GENCX_0, CODE_FOR_cgx_v4si, "spu_gencx_0", DEF_BUILTIN (SPU_GENCX_1, CODE_FOR_cgx_v4si, "spu_gencx_1", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) DEF_BUILTIN (SPU_MADD, CODE_FOR_nothing, "spu_madd", B_OVERLOAD, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_MADD_0, CODE_FOR_spu_mpya, "spu_madd_0", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V4SI)) -DEF_BUILTIN (SPU_MADD_1, CODE_FOR_fma_v4sf, "spu_madd_1", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) -DEF_BUILTIN (SPU_MADD_2, CODE_FOR_fma_v2df, "spu_madd_2", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_MADD_1, CODE_FOR_fmav4sf4, "spu_madd_1", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_MADD_2, CODE_FOR_fmav2df4, "spu_madd_2", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) DEF_BUILTIN (SPU_MSUB, CODE_FOR_nothing, "spu_msub", B_OVERLOAD, _A1(SPU_BTI_VOID)) -DEF_BUILTIN (SPU_MSUB_0, CODE_FOR_fms_v4sf, "spu_msub_0", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) -DEF_BUILTIN (SPU_MSUB_1, CODE_FOR_fms_v2df, "spu_msub_1", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_MSUB_0, CODE_FOR_fmsv4sf4, "spu_msub_0", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_MSUB_1, CODE_FOR_fmsv2df4, "spu_msub_1", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) DEF_BUILTIN (SPU_MHHADD, CODE_FOR_nothing, "spu_mhhadd", B_OVERLOAD, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_MHHADD_0, CODE_FOR_spu_mpyhhau, "spu_mhhadd_0", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV4SI)) DEF_BUILTIN (SPU_MHHADD_1, CODE_FOR_spu_mpyhha, "spu_mhhadd_1", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V4SI)) @@ -306,8 +306,8 @@ DEF_BUILTIN (SPU_MULO_1, CODE_FOR_spu_mpyu, "spu_mulo_1", DEF_BUILTIN (SPU_MULO_2, CODE_FOR_spu_mpy, "spu_mulo_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_INTHI)) DEF_BUILTIN (SPU_MULO_3, CODE_FOR_spu_mpyu, "spu_mulo_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) DEF_BUILTIN (SPU_NMSUB, CODE_FOR_nothing, "spu_nmsub", B_OVERLOAD, _A1(SPU_BTI_VOID)) -DEF_BUILTIN (SPU_NMSUB_0, CODE_FOR_fnms_v4sf, "spu_nmsub_0", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) -DEF_BUILTIN (SPU_NMSUB_1, CODE_FOR_fnms_v2df, "spu_nmsub_1", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_NMSUB_0, CODE_FOR_fnmav4sf4, "spu_nmsub_0", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_NMSUB_1, CODE_FOR_nfmsv2df4, "spu_nmsub_1", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) DEF_BUILTIN (SPU_SUB, CODE_FOR_nothing, "spu_sub", B_OVERLOAD, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_SUB_0, CODE_FOR_subv8hi3, "spu_sub_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) DEF_BUILTIN (SPU_SUB_1, CODE_FOR_subv8hi3, "spu_sub_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) @@ -771,8 +771,8 @@ DEF_BUILTIN (SPU_PROMOTE_9, CODE_FOR_spu_promote, "spu_promote_9", /* These are for the convenience of implementing fma() in the standard libraries. */ -DEF_BUILTIN (SCALAR_FMA, CODE_FOR_fma_sf, "fmas", B_INSN, _A4(SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT)) -DEF_BUILTIN (SCALAR_DFMA, CODE_FOR_fma_df, "dfmas", B_INSN, _A4(SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE)) +DEF_BUILTIN (SCALAR_FMA, CODE_FOR_fmasf4, "fmas", B_INSN, _A4(SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT)) +DEF_BUILTIN (SCALAR_DFMA, CODE_FOR_fmadf4, "dfmas", B_INSN, _A4(SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE)) DEF_BUILTIN (SPU_ALIGN_HINT, CODE_FOR_spu_align_hint,"spu_align_hint", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_7, SPU_BTI_7)) #undef _A1 diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md index 181d0db..528a07c 100644 --- a/gcc/config/spu/spu.md +++ b/gcc/config/spu/spu.md @@ -748,7 +748,7 @@ emit_move_insn (operands[4], CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode)); - emit_insn (gen_fma_sf (operands[0], + emit_insn (gen_fmasf4 (operands[0], operands[2], operands[4], operands[3])); DONE; }) @@ -1533,69 +1533,98 @@ "fm\t%0,%1,%2" [(set_attr "type" "fp")]) -(define_insn "fma_" +(define_insn "fma4" [(set (match_operand:VSF 0 "spu_reg_operand" "=r") - (plus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") - (match_operand:VSF 2 "spu_reg_operand" "r")) - (match_operand:VSF 3 "spu_reg_operand" "r")))] + (fma:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r") + (match_operand:VSF 3 "spu_reg_operand" "r")))] "" "fma\t%0,%1,%2,%3" [(set_attr "type" "fp6")]) -(define_insn "fnms_" +;; ??? The official description is (c - a*b), which is exactly (-a*b + c). +;; Note that this doesn't match the dfnms description. Incorrect? +(define_insn "fnma4" [(set (match_operand:VSF 0 "spu_reg_operand" "=r") - (minus:VSF (match_operand:VSF 3 "spu_reg_operand" "r") - (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") - (match_operand:VSF 2 "spu_reg_operand" "r"))))] + (fma:VSF + (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) + (match_operand:VSF 2 "spu_reg_operand" "r") + (match_operand:VSF 3 "spu_reg_operand" "r")))] "" "fnms\t%0,%1,%2,%3" [(set_attr "type" "fp6")]) -(define_insn "fms_" +(define_insn "fms4" [(set (match_operand:VSF 0 "spu_reg_operand" "=r") - (minus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") - (match_operand:VSF 2 "spu_reg_operand" "r")) - (match_operand:VSF 3 "spu_reg_operand" "r")))] + (fma:VSF + (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r") + (neg:VSF (match_operand:VSF 3 "spu_reg_operand" "r"))))] "" "fms\t%0,%1,%2,%3" [(set_attr "type" "fp6")]) -(define_insn "fma_" +(define_insn "fma4" [(set (match_operand:VDF 0 "spu_reg_operand" "=r") - (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") - (match_operand:VDF 2 "spu_reg_operand" "r")) - (match_operand:VDF 3 "spu_reg_operand" "0")))] + (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r") + (match_operand:VDF 3 "spu_reg_operand" "0")))] "" "dfma\t%0,%1,%2" [(set_attr "type" "fpd")]) -(define_insn "fnma_" +(define_insn "fms4" [(set (match_operand:VDF 0 "spu_reg_operand" "=r") - (neg:VDF (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") - (match_operand:VDF 2 "spu_reg_operand" "r")) - (match_operand:VDF 3 "spu_reg_operand" "0"))))] + (fma:VDF + (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r") + (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0"))))] + "" + "dfms\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "nfma4" + [(set (match_operand:VDF 0 "spu_reg_operand" "=r") + (neg:VDF + (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r") + (match_operand:VDF 3 "spu_reg_operand" "0"))))] "" "dfnma\t%0,%1,%2" [(set_attr "type" "fpd")]) -(define_insn "fnms_" +(define_insn "nfms4" [(set (match_operand:VDF 0 "spu_reg_operand" "=r") - (minus:VDF (match_operand:VDF 3 "spu_reg_operand" "0") - (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") - (match_operand:VDF 2 "spu_reg_operand" "r"))))] + (neg:VDF + (fma:VDF + (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r") + (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0")))))] "" "dfnms\t%0,%1,%2" [(set_attr "type" "fpd")]) -(define_insn "fms_" - [(set (match_operand:VDF 0 "spu_reg_operand" "=r") - (minus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") - (match_operand:VDF 2 "spu_reg_operand" "r")) - (match_operand:VDF 3 "spu_reg_operand" "0")))] - "" - "dfms\t%0,%1,%2" - [(set_attr "type" "fpd")]) +;; If signed zeros are ignored, -(a * b - c) = -a * b + c. +(define_expand "fnma4" + [(set (match_operand:VDF 0 "spu_reg_operand" "") + (neg:VDF + (fma:VDF + (match_operand:VDF 1 "spu_reg_operand" "") + (match_operand:VDF 2 "spu_reg_operand" "") + (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "")))))] + "!HONOR_SIGNED_ZEROS (mode)" + "") +;; If signed zeros are ignored, -(a * b + c) = -a * b - c. +(define_expand "fnms4" + [(set (match_operand:VDF 0 "register_operand" "") + (neg:VDF + (fma:VDF + (match_operand:VDF 1 "register_operand" "") + (match_operand:VDF 2 "register_operand" "") + (match_operand:VDF 3 "register_operand" ""))))] + "!HONOR_SIGNED_ZEROS (mode)" + "") ;; mul highpart, used for divide by constant optimizations. @@ -1845,8 +1874,8 @@ emit_insn (gen_frest_(operands[3], operands[2])); emit_insn (gen_fi_(operands[3], operands[2], operands[3])); emit_insn (gen_mul3(operands[4], operands[1], operands[3])); - emit_insn (gen_fnms_(operands[0], operands[4], operands[2], operands[1])); - emit_insn (gen_fma_(operands[0], operands[0], operands[3], operands[4])); + emit_insn (gen_fnma4(operands[0], operands[4], operands[2], operands[1])); + emit_insn (gen_fma4(operands[0], operands[0], operands[3], operands[4])); DONE; }) @@ -1870,8 +1899,8 @@ emit_insn (gen_frest_ (operands[3], operands[2])); emit_insn (gen_fi_ (operands[3], operands[2], operands[3])); emit_insn (gen_mul3 (operands[4], operands[1], operands[3])); - emit_insn (gen_fnms_ (operands[5], operands[4], operands[2], operands[1])); - emit_insn (gen_fma_ (operands[3], operands[5], operands[3], operands[4])); + emit_insn (gen_fnma4 (operands[5], operands[4], operands[2], operands[1])); + emit_insn (gen_fma4 (operands[3], operands[5], operands[3], operands[4])); /* Due to truncation error, the quotient result may be low by 1 ulp. Conditionally add one if the estimate is too small in magnitude. */ @@ -1885,7 +1914,7 @@ emit_insn (gen_add3 (gen_lowpart (mode, operands[4]), gen_lowpart (mode, operands[3]), spu_const (mode, 1))); - emit_insn (gen_fnms_ (operands[0], operands[2], operands[4], operands[1])); + emit_insn (gen_fnma4 (operands[0], operands[2], operands[4], operands[1])); emit_insn (gen_mul3 (operands[0], operands[0], operands[5])); emit_insn (gen_cgt_ (gen_lowpart (mode, operands[0]), gen_lowpart (mode, operands[0]), @@ -1920,8 +1949,8 @@ emit_insn (gen_fi_sf(operands[2],operands[1],operands[2])); emit_insn (gen_mulsf3(operands[5],operands[2],operands[1])); emit_insn (gen_mulsf3(operands[3],operands[5],operands[3])); - emit_insn (gen_fnms_sf(operands[4],operands[2],operands[5],operands[4])); - emit_insn (gen_fma_sf(operands[0],operands[4],operands[3],operands[5])); + emit_insn (gen_fnmasf4(operands[4],operands[2],operands[5],operands[4])); + emit_insn (gen_fmasf4(operands[0],operands[4],operands[3],operands[5])); DONE; })