@@ -311,6 +311,7 @@ i[34567]86-*-*)
cpu_type=i386
c_target_objs="i386-c.o"
cxx_target_objs="i386-c.o"
+ extra_options="${extra_options} fused-madd.opt"
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
@@ -322,6 +323,7 @@ x86_64-*-*)
cpu_type=i386
c_target_objs="i386-c.o"
cxx_target_objs="i386-c.o"
+ extra_options="${extra_options} fused-madd.opt"
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
@@ -34394,8 +34394,7 @@ ix86_autovectorize_vector_sizes (void)
#define TARGET_DEFAULT_TARGET_FLAGS \
(TARGET_DEFAULT \
| TARGET_SUBTARGET_DEFAULT \
- | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
- | MASK_FUSED_MADD)
+ | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
#undef TARGET_HANDLE_OPTION
#define TARGET_HANDLE_OPTION ix86_handle_option
@@ -261,12 +261,6 @@ Target Report Mask(VZEROUPPER) Save
Generate vzeroupper instruction before a transfer of control flow out of
the function.
-mfused-madd
-Target Report Mask(FUSED_MADD) Save
-Enable automatic generation of fused floating point multiply-add instructions
-if the ISA supports such instructions. The -mfused-madd option is on by
-default.
-
mdispatch-scheduler
Target RejectNegative Var(flag_dispatch_scheduler)
Do dispatch scheduling if processor is bdver1 and Haifa scheduling
@@ -1856,6 +1856,10 @@
;; (set (reg1) (mem (addr1)))
;; (set (reg2) (mult (reg1) (mem (addr2))))
;; (set (reg3) (plus (reg2) (mem (addr3))))
+;;
+;; ??? This is historic, pre-dating the gimple fma transformation.
+;; We could now properly represent that only one memory operand is
+;; allowed and not be penalized during optimization.
;; Intrinsic FMA operations.
@@ -2180,100 +2184,6 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
-;;
-;; ??? If fused-madd were a generic flag, combine could do this without
-;; needing splitters here in the backend. Irritatingly, combine won't
-;; recognize many of these with mere splits, since only 3 or more insns
-;; are allowed to split during combine. Thankfully, there's always a
-;; split_all_insns pass that runs before reload.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn_and_split "*split_fma"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (plus:FMAMODE
- (mult:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand")
- (match_operand:FMAMODE 2 "nonimmediate_operand"))
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "TARGET_SSE_MATH && TARGET_FUSED_MADD
- && (TARGET_FMA || TARGET_FMA4)
- && !(reload_in_progress || reload_completed)"
- { gcc_unreachable (); }
- "&& 1"
- [(set (match_dup 0)
- (fma:FMAMODE
- (match_dup 1)
- (match_dup 2)
- (match_dup 3)))]
- "")
-
-;; Floating multiply and subtract.
-(define_insn_and_split "*split_fms"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (minus:FMAMODE
- (mult:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand")
- (match_operand:FMAMODE 2 "nonimmediate_operand"))
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "TARGET_SSE_MATH && TARGET_FUSED_MADD
- && (TARGET_FMA || TARGET_FMA4)
- && !(reload_in_progress || reload_completed)"
- { gcc_unreachable (); }
- "&& 1"
- [(set (match_dup 0)
- (fma:FMAMODE
- (match_dup 1)
- (match_dup 2)
- (neg:FMAMODE (match_dup 3))))]
- "")
-
-;; Floating point negative multiply and add.
-;; Recognize (-a * b + c) via the canonical form: c - (a * b).
-(define_insn_and_split "*split_fnma"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (minus:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand")
- (mult:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand")
- (match_operand:FMAMODE 2 "nonimmediate_operand"))))]
- "TARGET_SSE_MATH && TARGET_FUSED_MADD
- && (TARGET_FMA || TARGET_FMA4)
- && !(reload_in_progress || reload_completed)"
- { gcc_unreachable (); }
- "&& 1"
- [(set (match_dup 0)
- (fma:FMAMODE
- (neg:FMAMODE (match_dup 1))
- (match_dup 2)
- (match_dup 3)))]
- "")
-
-;; Floating point negative multiply and subtract.
-;; Recognize (-a * b - c) via the canonical form: c - (-a * b).
-(define_insn_and_split "*split_fnms"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (minus:FMAMODE
- (mult:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand"))
- (match_operand:FMAMODE 2 "nonimmediate_operand"))
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "TARGET_SSE_MATH && TARGET_FUSED_MADD
- && (TARGET_FMA || TARGET_FMA4)
- && !(reload_in_progress || reload_completed)"
- { gcc_unreachable (); }
- "&& 1"
- [(set (match_dup 0)
- (fma:FMAMODE
- (neg:FMAMODE (match_dup 1))
- (match_dup 2)
- (neg:FMAMODE (match_dup 3))))]
- "")
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1,5 +1,5 @@
/* PR target/44338 */
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -mno-fused-madd" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -ffp-contract=off" } */
#include "sse-23.c"
This patch continues the elimination -mfused-madd in favor of -ffp-contract, converting the i386 backend. This target was already mostly tidied up. Just the -mfused-madd splitters to eliminate. I won't check this in until its pre-requisites are approved. r~ From 40c1cbaf9c329bfcad531ce2ec34cd87ebaba7c9 Mon Sep 17 00:00:00 2001 From: Richard Henderson <rth@twiddle.net> Date: Wed, 10 Nov 2010 13:47:26 -0800 Subject: [PATCH 3/3] i386: move -mfused-madd to -ffp-contract. Delete the TARGET_FUSED_MADD splitters. --- gcc/config.gcc | 2 + gcc/config/i386/i386.c | 3 +- gcc/config/i386/i386.opt | 6 -- gcc/config/i386/sse.md | 98 +------------------------------ gcc/testsuite/gcc.target/i386/sse-24.c | 2 +- 5 files changed, 8 insertions(+), 103 deletions(-)