[powerpc] Improve integer to floating point conversions on powerpc

Message ID	20100824145857.GA6425@hungry-tiger.westford.ibm.com
State	New
Headers	show Return-Path: <gcc-patches-return-271252-incoming=patchwork.ozlabs.org@gcc.gnu.org> sender: gnu@the-meissners.org) by homiemail-a28.g.dreamhost.com (Postfix) with ESMTPSA id 85C3F1B4097; Tue, 24 Aug 2010 07:58:59 -0700 (PDT) Date: Tue, 24 Aug 2010 10:58:57 -0400 From: Michael Meissner <meissner@linux.vnet.ibm.com> To: gcc-patches@gcc.gnu.org, dje.gcc@gmail.com Subject: [PATCH, powerpc] Improve integer to floating point conversions on powerpc Message-ID: <20100824145857.GA6425@hungry-tiger.westford.ibm.com> Mail-Followup-To: Michael Meissner <meissner@linux.vnet.ibm.com>, gcc-patches@gcc.gnu.org, dje.gcc@gmail.com MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="gBBFr7Ir9EOA20Yy" Content-Disposition: inline User-Agent: Mutt/1.5.20 (2009-08-17) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: gcc/config/rs6000/rs6000-protos.h =================================================================== --- gcc/config/rs6000/rs6000-protos.h (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk) (revision 163471) +++ gcc/config/rs6000/rs6000-protos.h (working copy) @@ -129,6 +129,9 @@ extern void rs6000_emit_parity (rtx, rtx extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode, rtx); +extern rtx rs6000_address_for_fpconvert (rtx); +extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool); +extern void rs6000_expand_convert_si_to_sfdf (rtx, rtx, bool); #endif /* RTX_CODE */ #ifdef TREE_CODE Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk) (revision 163471) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -2501,10 +2501,10 @@ rs6000_override_options (const char *def POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_MFPGPR | MASK_RECIP_PRECISION}, - {"power7", PROCESSOR_POWER7, + {"power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX| MASK_RECIP_PRECISION}, /* Don't add MASK_ISEL by default */ + | MASK_VSX | MASK_RECIP_PRECISION}, {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK}, {"powerpc64", PROCESSOR_POWERPC64, POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}, @@ -2541,15 +2541,19 @@ rs6000_override_options (const char *def ISA_2_1_MASKS = MASK_MFCRF, ISA_2_2_MASKS = (ISA_2_1_MASKS | MASK_POPCNTB | MASK_FPRND), - /* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and - don't add ALTIVEC, since in general it isn't a win on power6. */ - ISA_2_5_MASKS = (ISA_2_2_MASKS | MASK_CMPB | MASK_RECIP_PRECISION - | MASK_DFP), + /* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and don't + add ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, + fsel, fre, fsqrt, etc. were no longer documented as optional. Group + masks by server and embedded. */ + ISA_2_5_MASKS_EMBEDDED = (ISA_2_2_MASKS | MASK_CMPB | MASK_RECIP_PRECISION + | MASK_PPC_GFXOPT | MASK_PPC_GPOPT), + ISA_2_5_MASKS_SERVER = (ISA_2_5_MASKS_EMBEDDED | MASK_DFP), /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but altivec is a win so enable it. */ - ISA_2_6_MASKS = (ISA_2_5_MASKS | MASK_ALTIVEC | MASK_POPCNTD - | MASK_VSX | MASK_RECIP_PRECISION) + ISA_2_6_MASKS_EMBEDDED = (ISA_2_5_MASKS_EMBEDDED | MASK_POPCNTD), + ISA_2_6_MASKS_SERVER = (ISA_2_5_MASKS_SERVER | MASK_POPCNTD | MASK_ALTIVEC + | MASK_VSX) }; /* Numerous experiment shows that IRA based loop pressure @@ -2690,15 +2694,22 @@ rs6000_override_options (const char *def { warning (0, msg); target_flags &= ~ MASK_VSX; + target_flags_explicit |= MASK_VSX; } } /* For the newer switches (vsx, dfp, etc.) set some of the older options, unless the user explicitly used the -mno-<option> to disable the code. */ if (TARGET_VSX) - target_flags |= (ISA_2_6_MASKS & ~target_flags_explicit); + target_flags |= (ISA_2_6_MASKS_SERVER & ~target_flags_explicit); + else if (TARGET_POPCNTD) + target_flags |= (ISA_2_6_MASKS_EMBEDDED & ~target_flags_explicit); else if (TARGET_DFP) - target_flags |= (ISA_2_5_MASKS & ~target_flags_explicit); + target_flags |= (ISA_2_5_MASKS_SERVER & ~target_flags_explicit); + else if (TARGET_CMPB) + target_flags |= (ISA_2_5_MASKS_EMBEDDED & ~target_flags_explicit); + else if (TARGET_POPCNTB || TARGET_FPRND) + target_flags |= (ISA_2_2_MASKS & ~target_flags_explicit); else if (TARGET_ALTIVEC) target_flags |= (MASK_PPC_GFXOPT & ~target_flags_explicit); @@ -26941,4 +26952,122 @@ rs6000_final_prescan_insn (rtx insn, rtx } } + +/* Allocate a stack temp and fixup the address so it meets the particular + memory requirements (either offetable or REG+REG addressing). */ + +rtx +rs6000_allocate_stack_temp (enum machine_mode mode, + bool offsettable_p, + bool reg_reg_p) +{ + rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); + rtx addr = XEXP (stack, 0); + int strict_p = (reload_in_progress || reload_completed); + + if (!legitimate_indirect_address_p (addr, strict_p)) + { + if (offsettable_p + && !rs6000_legitimate_offset_address_p (mode, addr, strict_p)) + stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); + + else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p)) + stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); + } + + return stack; +} + +/* Given a memory reference, if it is not a reg or reg+reg addressing, convert + to such a form to deal with memory reference instructions like STFIWX that + only take reg+reg addressing. */ + +rtx +rs6000_address_for_fpconvert (rtx x) +{ + int strict_p = (reload_in_progress || reload_completed); + rtx addr; + + gcc_assert (MEM_P (x)); + addr = XEXP (x, 0); + if (! legitimate_indirect_address_p (addr, strict_p) + && ! legitimate_indexed_address_p (addr, strict_p)) + x = replace_equiv_address (x, copy_addr_to_reg (addr)); + + return x; +} + +/* Expand 32-bit int -> floating point conversions. Return true if + successful. */ + +void +rs6000_expand_convert_si_to_sfdf (rtx dest, rtx src, bool unsigned_p) +{ + enum machine_mode dmode = GET_MODE (dest); + rtx (*func_si) (rtx, rtx, rtx, rtx); + rtx (*func_si_mem) (rtx, rtx); + rtx (*func_di) (rtx, rtx); + rtx reg, stack; + + gcc_assert (GET_MODE (src) == SImode); + + if (dmode == SFmode) + { + if (unsigned_p) + { + gcc_assert (TARGET_FCFIDUS && TARGET_LFIWZX); + func_si = gen_floatunssisf2_lfiwzx; + func_si_mem = gen_floatunssisf2_lfiwzx_mem; + func_di = gen_floatunsdisf2; + } + else + { + gcc_assert (TARGET_FCFIDS && TARGET_LFIWAX); + func_si = gen_floatsisf2_lfiwax; + func_si_mem = gen_floatsisf2_lfiwax_mem; + func_di = gen_floatdisf2; + } + } + + else if (dmode == DFmode) + { + if (unsigned_p) + { + gcc_assert (TARGET_FCFIDU && TARGET_LFIWZX); + func_si = gen_floatunssidf2_lfiwzx; + func_si_mem = gen_floatunssidf2_lfiwzx_mem; + func_di = gen_floatunsdidf2; + } + else + { + gcc_assert (TARGET_FCFID && TARGET_LFIWAX); + func_si = gen_floatsidf2_lfiwax; + func_si_mem = gen_floatsidf2_lfiwax_mem; + func_di = gen_floatdidf2; + } + } + + else + gcc_unreachable (); + + if (MEM_P (src)) + { + src = rs6000_address_for_fpconvert (src); + emit_insn (func_si_mem (dest, src)); + } + else if (!TARGET_MFPGPR) + { + reg = gen_reg_rtx (DImode); + stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (func_si (dest, src, stack, reg)); + } + else + { + if (!REG_P (src)) + src = force_reg (SImode, src); + reg = convert_to_mode (DImode, src, unsigned_p); + emit_insn (func_di (dest, reg)); + } +} + #include "gt-rs6000.h" Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk) (revision 163471) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -105,6 +105,11 @@ (define_constants (UNSPEC_BPERM 51) (UNSPEC_COPYSIGN 52) (UNSPEC_PARITY 53) + (UNSPEC_FCTIW 54) + (UNSPEC_FCTID 55) + (UNSPEC_LFIWAX 56) + (UNSPEC_LFIWZX 57) + (UNSPEC_FCTIWUZ 58) ]) ;; @@ -252,13 +257,19 @@ (define_mode_attr mptrsize [(SI "si") (DI "di")]) (define_mode_attr rreg [(SF "f") - (DF "Ws") - (V4SF "Wf") - (V2DF "Wd")]) + (DF "ws") + (V4SF "wf") + (V2DF "wd")]) (define_mode_attr rreg2 [(SF "f") (DF "d")]) +(define_mode_attr SI_CONVERT_FP [(SF "TARGET_FCFIDS") + (DF "TARGET_FCFID")]) + +(define_mode_attr E500_CONVERT [(SF "!TARGET_FPRS") + (DF "TARGET_E500_DOUBLE")]) + (define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT") (DF "TARGET_DOUBLE_FLOAT")]) @@ -6448,6 +6459,150 @@ (define_expand "fixuns_truncdfdi2" "TARGET_HARD_FLOAT && TARGET_VSX" "") +; We don't define lfiwax/lfiwzx with the normal definition, because we +; don't want to support putting SImode in FPR registers. +(define_insn "lfiwax" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + UNSPEC_LFIWAX))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX" + "lfiwax %0,%y1" + [(set_attr "type" "fpload")]) + +(define_insn_and_split "floatsi<mode>2_lfiwax" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>,<rreg2>") + (float:SFDF (match_operand:SI 1 "nonimmediate_operand" "Z,r"))) + (clobber (match_operand:SI 2 "indexed_or_indirect_operand" "=Z,Z")) + (clobber (match_operand:DI 3 "gpc_reg_operand" "=d,d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX + && <SI_CONVERT_FP>" + "#" + "MEM_P (operands[1]) || reload_completed" + [(pc)] + " +{ + if (MEM_P (operands[1])) + { + operands[1] = rs6000_address_for_fpconvert (operands[1]); + emit_insn (gen_lfiwax (operands[3], operands[1])); + } + else + { + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_lfiwax (operands[3], operands[2])); + } + emit_insn (gen_floatdi<mode>2 (operands[0], operands[3])); + DONE; +}" + [(set_attr "length" "8,12")]) + +(define_insn_and_split "floatsi<mode>2_lfiwax_mem" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") + (float:SFDF (match_operand:SI 1 "memory_operand" "Z"))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX + && <SI_CONVERT_FP>" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_insn (gen_lfiwax (operands[2], operands[1])); + emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8")]) + +(define_insn_and_split "floatsi<mode>2_lfiwax_mem2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") + (float:SFDF + (sign_extend:DI + (match_operand:SI 1 "memory_operand" "Z")))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX + && <SI_CONVERT_FP>" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_insn (gen_lfiwax (operands[2], operands[1])); + emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8")]) + +(define_insn "lfiwzx" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + UNSPEC_LFIWZX))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX" + "lfiwzx %0,%y1" + [(set_attr "type" "fpload")]) + +(define_insn_and_split "floatunssi<mode>2_lfiwzx" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>,<rreg2>") + (unsigned_float:SFDF (match_operand:SI 1 "gpc_reg_operand" "Z,r"))) + (clobber (match_operand:SI 2 "indexed_or_indirect_operand" "=Z,Z")) + (clobber (match_operand:DI 3 "gpc_reg_operand" "=d,d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX + && <SI_CONVERT_FP>" + "#" + "MEM_P (operands[1]) || reload_completed" + [(pc)] + " +{ + if (MEM_P (operands[1])) + { + operands[1] = rs6000_address_for_fpconvert (operands[1]); + emit_insn (gen_lfiwzx (operands[3], operands[1])); + } + else + { + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_lfiwzx (operands[3], operands[2])); + } + emit_insn (gen_floatdi<mode>2 (operands[0], operands[3])); + DONE; +}" + [(set_attr "length" "8,12")]) + +(define_insn_and_split "floatunssi<mode>2_lfiwzx_mem" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") + (unsigned_float:SFDF (match_operand:SI 1 "memory_operand" "Z"))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX + && <SI_CONVERT_FP>" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_insn (gen_lfiwzx (operands[2], operands[1])); + emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8")]) + +(define_insn_and_split "floatunssi<mode>2_lfiwzx_mem2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") + (unsigned_float:SFDF + (zero_extend:DI + (match_operand:SI 1 "memory_operand" "Z")))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX + && <SI_CONVERT_FP>" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_insn (gen_lfiwzx (operands[2], operands[1])); + emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8")]) + ; For each of these conversions, there is a define_expand, a define_insn ; with a '#' template, and a define_split (with C code). The idea is ; to allow constant folding with the template of the define_insn, @@ -6455,7 +6610,7 @@ (define_expand "fixuns_truncdfdi2" (define_expand "floatsidf2" [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "") - (float:DF (match_operand:SI 1 "gpc_reg_operand" ""))) + (float:DF (match_operand:SI 1 "nonimmediate_operand" ""))) (use (match_dup 2)) (use (match_dup 3)) (clobber (match_dup 4)) @@ -6467,19 +6622,31 @@ (define_expand "floatsidf2" { if (TARGET_E500_DOUBLE) { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); emit_insn (gen_spe_floatsidf2 (operands[0], operands[1])); DONE; } - if (TARGET_POWERPC64) + else if (TARGET_LFIWAX && TARGET_FCFID) + { + rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], false); + DONE; + } + else if (TARGET_FCFID) { - rtx x = convert_to_mode (DImode, operands[1], 0); - emit_insn (gen_floatdidf2 (operands[0], x)); + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, false); + emit_insn (gen_floatdidf2 (operands[0], dreg)); DONE; } + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); operands[2] = force_reg (SImode, GEN_INT (0x43300000)); operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503601774854144\", DFmode)); - operands[4] = assign_stack_temp (DFmode, GET_MODE_SIZE (DFmode), 0); + operands[4] = rs6000_allocate_stack_temp (DFmode, true, false); operands[5] = gen_reg_rtx (DFmode); operands[6] = gen_reg_rtx (SImode); }") @@ -6492,7 +6659,7 @@ (define_insn_and_split "*floatsidf2_inte (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o")) (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d")) (clobber (match_operand:SI 6 "gpc_reg_operand" "=&r"))] - "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "! TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" "#" "" [(pc)] @@ -6518,37 +6685,79 @@ (define_insn_and_split "*floatsidf2_inte }" [(set_attr "length" "24")]) +;; If we don't have a direct conversion to single precision, don't enable this +;; conversion for 32-bit without fast math, because we don't have the insn to +;; generate the fixup swizzle to avoid double rounding problems. (define_expand "floatunssisf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") - (unsigned_float:SF (match_operand:SI 1 "gpc_reg_operand" "")))] - "TARGET_HARD_FLOAT && !TARGET_FPRS && TARGET_SINGLE_FLOAT" - "") + (unsigned_float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT + && (!TARGET_FPRS + || (TARGET_FPRS + && ((TARGET_FCFIDUS && TARGET_LFIWZX) + || (TARGET_DOUBLE_FLOAT && TARGET_FCFID + && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))" + " +{ + if (!TARGET_FPRS) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + } + else if (TARGET_LFIWZX && TARGET_FCFIDUS) + { + rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], true); + DONE; + } + else + { + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, true); + emit_insn (gen_floatdisf2 (operands[0], dreg)); + DONE; + } +}") (define_expand "floatunssidf2" [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "") - (unsigned_float:DF (match_operand:SI 1 "gpc_reg_operand" ""))) + (unsigned_float:DF (match_operand:SI 1 "nonimmediate_operand" ""))) (use (match_dup 2)) (use (match_dup 3)) (clobber (match_dup 4)) (clobber (match_dup 5))])] - "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)" + "TARGET_HARD_FLOAT + && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)" " { if (TARGET_E500_DOUBLE) { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); emit_insn (gen_spe_floatunssidf2 (operands[0], operands[1])); DONE; } - if (TARGET_POWERPC64) + else if (TARGET_LFIWZX && TARGET_FCFID) + { + rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], true); + DONE; + } + else if (TARGET_FCFID) { - rtx x = convert_to_mode (DImode, operands[1], 1); - emit_insn (gen_floatdidf2 (operands[0], x)); + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, true); + emit_insn (gen_floatdidf2 (operands[0], dreg)); DONE; } + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); operands[2] = force_reg (SImode, GEN_INT (0x43300000)); operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503599627370496\", DFmode)); - operands[4] = assign_stack_temp (DFmode, GET_MODE_SIZE (DFmode), 0); + operands[4] = rs6000_allocate_stack_temp (DFmode, true, false); operands[5] = gen_reg_rtx (DFmode); }") @@ -6559,7 +6768,8 @@ (define_insn_and_split "*floatunssidf2_i (use (match_operand:DF 3 "gpc_reg_operand" "d")) (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o")) (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d"))] - "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "! TARGET_FCFIDU && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !(TARGET_FCFID && TARGET_POWERPC64)" "#" "" [(pc)] @@ -6782,76 +6992,154 @@ (define_insn "stfiwx" "stfiwx %1,%y0" [(set_attr "type" "fpstore")]) +;; If we don't have a direct conversion to single precision, don't enable this +;; conversion for 32-bit without fast math, because we don't have the insn to +;; generate the fixup swizzle to avoid double rounding problems. (define_expand "floatsisf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") - (float:SF (match_operand:SI 1 "gpc_reg_operand" "")))] - "TARGET_HARD_FLOAT && !TARGET_FPRS" - "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT + && (!TARGET_FPRS + || (TARGET_FPRS + && ((TARGET_FCFIDS && TARGET_LFIWAX) + || (TARGET_DOUBLE_FLOAT && TARGET_FCFID + && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))" + " +{ + if (!TARGET_FPRS) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + } + else if (TARGET_FCFIDS && TARGET_LFIWAX) + { + rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], false); + DONE; + } + else + { + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, false); + emit_insn (gen_floatdisf2 (operands[0], dreg)); + DONE; + } +}") (define_expand "floatdidf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") (float:DF (match_operand:DI 1 "gpc_reg_operand" "")))] - "(TARGET_POWERPC64 || TARGET_XILINX_FPU || VECTOR_UNIT_VSX_P (DFmode)) - && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" "") (define_insn "*floatdidf2_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (float:DF (match_operand:DI 1 "gpc_reg_operand" "!d#r")))] - "(TARGET_POWERPC64 || TARGET_XILINX_FPU) - && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS + (float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))] + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && !VECTOR_UNIT_VSX_P (DFmode)" "fcfid %0,%1" [(set_attr "type" "fp")]) +; Allow the combiner to merge source memory operands to the conversion so that +; the optimizer/register allocator doesn't try to load the value too early in a +; GPR and then use store/load to move it to a FPR and suffer from a store-load +; hit. We will split after reload to avoid the trip through the GPRs + +(define_insn_and_split "*floatdidf2_mem" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (float:DF (match_operand:DI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && TARGET_FCFID" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:DF (match_dup 2)))] + "" + [(set_attr "length" "8")]) + (define_expand "floatunsdidf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") - (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "")))] - "TARGET_VSX" + (unsigned_float:DF + (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode)" "") -(define_expand "fix_truncdfdi2" - [(set (match_operand:DI 0 "gpc_reg_operand" "") - (fix:DI (match_operand:DF 1 "gpc_reg_operand" "")))] - "(TARGET_POWERPC64 || TARGET_XILINX_FPU || VECTOR_UNIT_VSX_P (DFmode)) - && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" - "") +(define_insn "*floatunsdidf2_fcfidu" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))] + "TARGET_FCFIDU && !VECTOR_UNIT_VSX_P (DFmode)" + "fcfidu %0,%1" + [(set_attr "type" "fp") + (set_attr "length" "4")]) -(define_insn "*fix_truncdfdi2_fpr" - [(set (match_operand:DI 0 "gpc_reg_operand" "=!d#r") - (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d")))] - "(TARGET_POWERPC64 || TARGET_XILINX_FPU) - && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS - && !VECTOR_UNIT_VSX_P (DFmode)" - "fctidz %0,%1" - [(set_attr "type" "fp")]) +(define_insn_and_split "*floatunsdidf2_mem" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unsigned_float:DF (match_operand:DI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode)" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (unsigned_float:DF (match_dup 2)))] + "" + [(set_attr "length" "8")]) (define_expand "floatdisf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") (float:SF (match_operand:DI 1 "gpc_reg_operand" "")))] - "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT " + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && (TARGET_FCFIDS || TARGET_POWERPC64 || flag_unsafe_math_optimizations)" " { - rtx val = operands[1]; - if (!flag_unsafe_math_optimizations) + if (!TARGET_FCFIDS) { - rtx label = gen_label_rtx (); - val = gen_reg_rtx (DImode); - emit_insn (gen_floatdisf2_internal2 (val, operands[1], label)); - emit_label (label); + rtx val = operands[1]; + if (!flag_unsafe_math_optimizations) + { + rtx label = gen_label_rtx (); + val = gen_reg_rtx (DImode); + emit_insn (gen_floatdisf2_internal2 (val, operands[1], label)); + emit_label (label); + } + emit_insn (gen_floatdisf2_internal1 (operands[0], val)); + DONE; } - emit_insn (gen_floatdisf2_internal1 (operands[0], val)); - DONE; }") +(define_insn "floatdisf2_fcfids" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS" + "fcfids %0,%1" + [(set_attr "type" "fp")]) + +(define_insn_and_split "*floatdisf2_mem" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (float:SF (match_operand:DI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=f"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_floatdisf2_fcfids (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8")]) + ;; This is not IEEE compliant if rounding mode is "round to nearest". ;; If the DI->DF conversion is inexact, then it's possible to suffer ;; from double rounding. (define_insn_and_split "floatdisf2_internal1" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (float:SF (match_operand:DI 1 "gpc_reg_operand" "!d#r"))) + (float:SF (match_operand:DI 1 "gpc_reg_operand" "d"))) (clobber (match_scratch:DF 2 "=d"))] - "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" "#" "&& reload_completed" [(set (match_dup 2) @@ -6891,6 +7179,38 @@ (define_expand "floatdisf2_internal2" operands[3] = gen_reg_rtx (DImode); operands[4] = gen_reg_rtx (CCUNSmode); }") + +(define_expand "floatunsdisf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" + "") + +(define_insn "floatunsdisf2_fcfidus" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" + "fcfidus %0,%1" + [(set_attr "type" "fp")]) + +(define_insn_and_split "*floatunsdisf2_mem" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (unsigned_float:SF (match_operand:DI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=f"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_floatunsdisf2_fcfidus (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8")]) ;; Define the DImode operations that can be done in a small number ;; of instructions. The & constraints are to prevent the register Index: gcc/config/rs6000/rs6000.h =================================================================== --- gcc/config/rs6000/rs6000.h (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk) (revision 163471) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -1,6 +1,7 @@ /* Definitions of target machine for GNU compiler, for IBM RS/6000. Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 + 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, + 2010 Free Software Foundation, Inc. Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) @@ -554,6 +555,25 @@ extern int rs6000_vector_align[]; #define TARGET_E500_DOUBLE 0 #define CHECK_E500_OPTIONS do { } while (0) +/* ISA 2.01 allowed FCFID to be done in 32-bit, previously it was 64-bit only. + Enable 32-bit fcfid's on any of the switches for newer ISA machines or + XILINX. */ +#define TARGET_FCFID (TARGET_POWERPC64 \ + || TARGET_POPCNTB /* ISA 2.02 */ \ + || TARGET_CMPB /* ISA 2.05 */ \ + || TARGET_POPCNTD /* ISA 2.06 */ \ + || TARGET_XILINX_FPU) + +#define TARGET_FCTIDZ TARGET_FCFID +#define TARGET_STFIWX TARGET_PPC_GFXOPT +#define TARGET_LFIWAX TARGET_CMPB +#define TARGET_LFIWZX TARGET_POPCNTD +#define TARGET_FCFIDS TARGET_POPCNTD +#define TARGET_FCFIDU TARGET_POPCNTD +#define TARGET_FCFIDUS TARGET_POPCNTD +#define TARGET_FCTIDUZ TARGET_POPCNTD +#define TARGET_FCTIWUZ TARGET_POPCNTD + /* E500 processors only support plain "sync", not lwsync. */ #define TARGET_NO_LWSYNC TARGET_E500 Index: gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c (.../gcc/testsuite/gcc.target/powerpc) (revision 163489) @@ -0,0 +1,50 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7 -ffast-math" } */ +/* { dg-final { scan-assembler-times "lfiwax" 2 } } */ +/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */ +/* { dg-final { scan-assembler-times "fcfids" 3 } } */ +/* { dg-final { scan-assembler-times "fcfidus" 1 } } */ +/* { dg-final { scan-assembler-times "xscvsxddp" 3 } } */ +/* { dg-final { scan-assembler-times "xscvuxddp" 1 } } */ + +void int_to_float (float *dest, int *src) +{ + *dest = (float) *src; +} + +void int_to_double (double *dest, int *src) +{ + *dest = (double) *src; +} + +void uint_to_float (float *dest, unsigned int *src) +{ + *dest = (float) *src; +} + +void uint_to_double (double *dest, unsigned int *src) +{ + *dest = (double) *src; +} + +void llong_to_float (float *dest, long long *src) +{ + *dest = (float) *src; +} + +void llong_to_double (double *dest, long long *src) +{ + *dest = (double) *src; +} + +void ullong_to_float (float *dest, unsigned long long *src) +{ + *dest = (float) *src; +} + +void ullong_to_double (double *dest, unsigned long long *src) +{ + *dest = (double) *src; +} Index: gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c (.../gcc/testsuite/gcc.target/powerpc) (revision 163489) @@ -0,0 +1,51 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power6 -ffast-math" } */ +/* { dg-final { scan-assembler-times "lfiwax" 1 } } */ +/* { dg-final { scan-assembler-not "lfiwzx" } } */ +/* { dg-final { scan-assembler-times "fcfid " 10 } } */ +/* { dg-final { scan-assembler-not "fcfids" } } */ +/* { dg-final { scan-assembler-not "fcfidus" } } */ +/* { dg-final { scan-assembler-not "xscvsxddp" } } */ +/* { dg-final { scan-assembler-not "xscvuxddp" } } */ + +void int_to_float (float *dest, int *src) +{ + *dest = (float) *src; +} + +void int_to_double (double *dest, int *src) +{ + *dest = (double) *src; +} + +void uint_to_float (float *dest, unsigned int *src) +{ + *dest = (float) *src; +} + +void uint_to_double (double *dest, unsigned int *src) +{ + *dest = (double) *src; +} + +void llong_to_float (float *dest, long long *src) +{ + *dest = (float) *src; +} + +void llong_to_double (double *dest, long long *src) +{ + *dest = (double) *src; +} + +void ullong_to_float (float *dest, unsigned long long *src) +{ + *dest = (float) *src; +} + +void ullong_to_double (double *dest, unsigned long long *src) +{ + *dest = (double) *src; +} Index: gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c (.../gcc/testsuite/gcc.target/powerpc) (revision 163489) @@ -0,0 +1,51 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -mcpu=power5 -ffast-math" } */ +/* { dg-final { scan-assembler-not "lfiwax" } } */ +/* { dg-final { scan-assembler-not "lfiwzx" } } */ +/* { dg-final { scan-assembler-times "fcfid " 10 } } */ +/* { dg-final { scan-assembler-not "fcfids" } } */ +/* { dg-final { scan-assembler-not "fcfidus" } } */ +/* { dg-final { scan-assembler-not "xscvsxddp" } } */ +/* { dg-final { scan-assembler-not "xscvuxddp" } } */ + +void int_to_float (float *dest, int *src) +{ + *dest = (float) *src; +} + +void int_to_double (double *dest, int *src) +{ + *dest = (double) *src; +} + +void uint_to_float (float *dest, unsigned int *src) +{ + *dest = (float) *src; +} + +void uint_to_double (double *dest, unsigned int *src) +{ + *dest = (double) *src; +} + +void llong_to_float (float *dest, long long *src) +{ + *dest = (float) *src; +} + +void llong_to_double (double *dest, long long *src) +{ + *dest = (double) *src; +} + +void ullong_to_float (float *dest, unsigned long long *src) +{ + *dest = (float) *src; +} + +void ullong_to_double (double *dest, unsigned long long *src) +{ + *dest = (double) *src; +} Index: gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c (.../gcc/testsuite/gcc.target/powerpc) (revision 163489) @@ -0,0 +1,51 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -mcpu=750 -ffast-math" } */ +/* { dg-final { scan-assembler-not "lfiwax" } } */ +/* { dg-final { scan-assembler-not "lfiwzx" } } */ +/* { dg-final { scan-assembler-not "fcfid " } } */ +/* { dg-final { scan-assembler-not "fcfids" } } */ +/* { dg-final { scan-assembler-not "fcfidus" } } */ +/* { dg-final { scan-assembler-not "xscvsxddp" } } */ +/* { dg-final { scan-assembler-not "xscvuxddp" } } */ + +void int_to_float (float *dest, int *src) +{ + *dest = (float) *src; +} + +void int_to_double (double *dest, int *src) +{ + *dest = (double) *src; +} + +void uint_to_float (float *dest, unsigned int *src) +{ + *dest = (float) *src; +} + +void uint_to_double (double *dest, unsigned int *src) +{ + *dest = (double) *src; +} + +void llong_to_float (float *dest, long long *src) +{ + *dest = (float) *src; +} + +void llong_to_double (double *dest, long long *src) +{ + *dest = (double) *src; +} + +void ullong_to_float (float *dest, unsigned long long *src) +{ + *dest = (float) *src; +} + +void ullong_to_double (double *dest, unsigned long long *src) +{ + *dest = (double) *src; +}

[powerpc] Improve integer to floating point conversions on powerpc

Commit Message

Comments

Patch