===================================================================
@@ -57,7 +57,8 @@
[(set (match_operand:VALL 0 "s_register_operand" "")
(plus:VALL (match_operand:VALL 1 "s_register_operand" "")
(match_operand:VALL 2 "s_register_operand" "")))]
- "TARGET_NEON
+ "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+ || flag_unsafe_math_optimizations))
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{
})
@@ -66,7 +67,8 @@
[(set (match_operand:VALL 0 "s_register_operand" "")
(minus:VALL (match_operand:VALL 1 "s_register_operand" "")
(match_operand:VALL 2 "s_register_operand" "")))]
- "TARGET_NEON
+ "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+ || flag_unsafe_math_optimizations))
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{
})
@@ -75,7 +77,9 @@
[(set (match_operand:VALLW 0 "s_register_operand" "")
(mult:VALLW (match_operand:VALLW 1 "s_register_operand" "")
(match_operand:VALLW 2 "s_register_operand" "")))]
- "TARGET_NEON || (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
+ "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+ || flag_unsafe_math_optimizations))
+ || (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
{
})
@@ -83,7 +87,8 @@
[(set (match_operand:VALLW 0 "s_register_operand" "")
(smin:VALLW (match_operand:VALLW 1 "s_register_operand" "")
(match_operand:VALLW 2 "s_register_operand" "")))]
- "TARGET_NEON
+ "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+ || flag_unsafe_math_optimizations))
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{
})
@@ -101,7 +106,8 @@
[(set (match_operand:VALLW 0 "s_register_operand" "")
(smax:VALLW (match_operand:VALLW 1 "s_register_operand" "")
(match_operand:VALLW 2 "s_register_operand" "")))]
- "TARGET_NEON
+ "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+ || flag_unsafe_math_optimizations))
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{
})
===================================================================
@@ -833,7 +833,7 @@
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -847,7 +847,7 @@
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -861,7 +861,7 @@
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -883,7 +883,7 @@
(plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
(match_operand:VDQ 3 "s_register_operand" "w"))
(match_operand:VDQ 1 "s_register_operand" "0")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -905,7 +905,7 @@
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0")
(mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
(match_operand:VDQ 3 "s_register_operand" "w"))))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -1320,7 +1320,7 @@
(parallel [(const_int 0) (const_int 1)]))
(vec_select:V2SF (match_dup 1)
(parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_NEON"
+ "TARGET_NEON && flag_unsafe_math_optimizations"
"<VQH_mnem>.f32\t%P0, %e1, %f1"
[(set_attr "vqh_mnem" "<VQH_mnem>")
(set (attr "neon_type")
@@ -1455,7 +1455,7 @@
(define_expand "reduc_splus_<mode>"
[(match_operand:VD 0 "s_register_operand" "")
(match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
&gen_neon_vpadd_internal<mode>);
@@ -1465,7 +1465,7 @@
(define_expand "reduc_splus_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1500,7 +1500,7 @@
(define_expand "reduc_smin_<mode>"
[(match_operand:VD 0 "s_register_operand" "")
(match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
&gen_neon_vpsmin<mode>);
@@ -1510,7 +1510,7 @@
(define_expand "reduc_smin_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1525,7 +1525,7 @@
(define_expand "reduc_smax_<mode>"
[(match_operand:VD 0 "s_register_operand" "")
(match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
&gen_neon_vpsmax<mode>);
@@ -1535,7 +1535,7 @@
(define_expand "reduc_smax_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
===================================================================
@@ -9960,6 +9960,14 @@ of GCC@.
If @option{-msoft-float} is specified this specifies the format of
floating point values.
+If the selected floating-point hardware includes the NEON extension
+(e.g. @option{-mfpu}=@samp{neon}), note that floating-point
+operations will not be used by GCC's auto-vectorization pass unless
+@option{-funsafe-math-optimizations} is also specified. This is
+because NEON hardware does not fully implement the IEEE 754 standard for
+floating-point arithmetic (in particular denormal values are treated as
+zero), so the use of NEON instructions may lead to a loss of precision.
+
@item -mfp16-format=@var{name}
@opindex mfp16-format
Specify the format of the @code{__fp16} half-precision floating-point type.
===================================================================
@@ -105,6 +105,10 @@ if [istarget "powerpc-*paired*"] {
set dg-do-what-default run
} elseif [is-effective-target arm_neon_ok] {
eval lappend DEFAULT_VECTCFLAGS [add_options_for_arm_neon ""]
+ # NEON does not support denormals, so is not used for vectorization by
+ # default to avoid loss of precision. We must pass -ffast-math to test
+ # vectorization of float operations.
+ lappend DEFAULT_VECTCFLAGS "-ffast-math"
if [is-effective-target arm_neon_hw] {
set dg-do-what-default run
} else {
===================================================================
@@ -49,5 +49,6 @@ int main (void)
}
/* need -ffast-math to vectorizer these loops. */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */