===================================================================
@@ -2315,6 +2315,18 @@ extern void debug_dispatch_window (int);
((FLAGS) & (IX86_CALLCVT_CDECL | IX86_CALLCVT_STDCALL \
| IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL))
+#define RECIP_MASK_DIV 0x01
+#define RECIP_MASK_SQRT 0x02
+#define RECIP_MASK_VEC_DIV 0x04
+#define RECIP_MASK_VEC_SQRT 0x08
+#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \
+ | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_SQRT)
+
+#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0)
+#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0)
+#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0)
+#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0)
+
/*
Local variables:
version-control: t
===================================================================
@@ -7062,7 +7062,9 @@ (define_expand "divsf3"
"(TARGET_80387 && X87_ENABLE_ARITH (SFmode))
|| TARGET_SSE_MATH"
{
- if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+ if (TARGET_SSE_MATH
+ && TARGET_RECIP_DIV
+ && optimize_insn_for_speed_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
@@ -13438,7 +13440,9 @@ (define_expand "sqrt<mode>2"
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
if (<MODE>mode == SFmode
- && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun)
+ && TARGET_SSE_MATH
+ && TARGET_RECIP_SQRT
+ && !optimize_function_for_size_p (cfun)
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
===================================================================
@@ -779,7 +779,9 @@ (define_expand "div<mode>3"
{
ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
- if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ if (TARGET_SSE_MATH
+ && TARGET_RECIP_VEC_DIV
+ && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
@@ -857,7 +859,9 @@ (define_expand "sqrt<mode>2"
(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
- if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ if (TARGET_SSE_MATH
+ && TARGET_RECIP_VEC_SQRT
+ && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
===================================================================
@@ -31,6 +31,15 @@ HOST_WIDE_INT ix86_isa_flags = TARGET_64
Variable
HOST_WIDE_INT ix86_isa_flags_explicit
+TargetVariable
+int recip_mask
+
+Variable
+int recip_mask_explicit
+
+TargetSave
+int x_recip_mask_explicit
+
;; Definitions to add to the cl_target_option structure
;; -march= processor
TargetSave
@@ -373,6 +382,10 @@ mrecip
Target Report Mask(RECIP) Save
Generate reciprocals instead of divss and sqrtss.
+mrecip=
+Target Report RejectNegative Joined Var(ix86_recip_name) Save
+Control generation of reciprocal estimates.
+
mcld
Target Report Mask(CLD) Save
Generate cld instruction in the function prologue.
===================================================================
@@ -3057,6 +3057,22 @@ ix86_option_override_internal (bool main
PTA_64BIT /* flags are only used for -march switch. */ },
};
+ /* -mrecip options. */
+ static struct
+ {
+ const char *string; /* option name */
+ unsigned int mask; /* mask bits to set */
+ }
+ const recip_options[] =
+ {
+ { "all", RECIP_MASK_ALL },
+ { "none", 0 },
+ { "div", RECIP_MASK_DIV },
+ { "sqrt", RECIP_MASK_SQRT },
+ { "vec-div", RECIP_MASK_VEC_DIV },
+ { "vec-sqrt", RECIP_MASK_VEC_SQRT },
+ };
+
int const pta_size = ARRAY_SIZE (processor_alias_table);
/* Set up prefix/suffix so the error messages refer to either the command
@@ -3814,6 +3830,56 @@ ix86_option_override_internal (bool main
target_flags &= ~MASK_VZEROUPPER;
}
+ if (ix86_recip_name)
+ {
+ char *p = ASTRDUP (ix86_recip_name);
+ char *q;
+ unsigned int mask, i;
+ bool invert;
+
+ while ((q = strtok (p, ",")) != NULL)
+ {
+ p = NULL;
+ if (*q == '!')
+ {
+ invert = true;
+ q++;
+ }
+ else
+ invert = false;
+
+ if (!strcmp (q, "default"))
+ mask = RECIP_MASK_ALL;
+ else
+ {
+ for (i = 0; i < ARRAY_SIZE (recip_options); i++)
+ if (!strcmp (q, recip_options[i].string))
+ {
+ mask = recip_options[i].mask;
+ break;
+ }
+
+ if (i == ARRAY_SIZE (recip_options))
+ {
+ error ("unknown option for -mrecip=%s", q);
+ invert = false;
+ mask = 0;
+ }
+ }
+
+ recip_mask_explicit |= mask;
+ if (invert)
+ recip_mask &= ~mask;
+ else
+ recip_mask |= mask;
+ }
+ }
+
+ if (TARGET_RECIP)
+ recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
+ else if (target_flags_explicit & MASK_RECIP)
+ recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
+
/* Save the initial options in case the user does function specific
options. */
if (main_args_p)
@@ -3946,6 +4012,7 @@ ix86_function_specific_save (struct cl_t
ptr->arch_specified = ix86_arch_specified;
ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
ptr->ix86_target_flags_explicit = target_flags_explicit;
+ ptr->x_recip_mask_explicit = recip_mask_explicit;
/* The fields are char but the variables are not; make sure the
values fit in the fields. */
@@ -3973,6 +4040,7 @@ ix86_function_specific_restore (struct c
ix86_arch_specified = ptr->arch_specified;
ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
target_flags_explicit = ptr->ix86_target_flags_explicit;
+ recip_mask_explicit = ptr->x_recip_mask_explicit;
/* Recreate the arch feature tests if the arch changed */
if (old_arch != ix86_arch)
===================================================================
@@ -605,7 +605,9 @@ Objective-C and Objective-C++ Dialects}.
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
-mincoming-stack-boundary=@var{num} @gol
--mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip -mvzeroupper @gol
+-mcld -mcx16 -msahf -mmovbe -mcrc32 @gol
+-mrecip -mrecip=@var{opt} @gol
+-mvzeroupper @gol
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
-mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
@@ -12871,6 +12873,22 @@ Note that GCC implements 1.0f/sqrtf(x) i
already with @option{-ffast-math} (or the above option combination), and
doesn't need @option{-mrecip}.
+@item -mrecip=@var{opt}
+@opindex mrecip=opt
+This option allows to control which reciprocal estimate instructions
+may be used. @var{opt} is a comma separated list of options, that may
+be preceded by a @code{!} to invert the option:
+@code{all}: enable all estimate instructions,
+@code{default}: enable the default instructions, equivalent to @option{-mrecip},
+@code{none}: disable all estimate instructions, equivalent to @option{-mno-recip},
+@code{div}: enable the approximation for scalar division,
+@code{vec-div}: enable the approximation for vectorized division,
+@code{sqrt}: enable the approximation for scalar square root,
+@code{vec-sqrt}: enable the approximation for vectorized square root.
+
+So for example, @option{-mrecip=all,!sqrt} would enable
+all of the reciprocal approximations, except for square root.
+
@item -mveclibabi=@var{type}
@opindex mveclibabi
Specifies the ABI type to use for vectorizing intrinsics using an