@@ -5694,11 +5694,20 @@ (define_insn "*trunc<mode>hf2"
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
+/* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly,
+ and it always round to even.
+ flag_unsafte_math_optimization is needed for psrld.
+ If we don't expect qNaNs nor sNaNs and can assume rounding
+ to nearest, we can expand the conversion inline as
+ (fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16. */
(define_insn "truncsfbf2"
[(set (match_operand:BF 0 "register_operand" "=x,x,v,Yv")
(float_truncate:BF
(match_operand:SF 1 "register_operand" "0,x,v,Yv")))]
- "TARGET_SSE2 && flag_unsafe_math_optimizations && !HONOR_NANS (BFmode)"
+ "TARGET_SSE2 && !HONOR_NANS (BFmode) && !flag_rounding_math
+ && (flag_unsafe_math_optimizations
+ || TARGET_AVXNECONVERT
+ || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
"@
psrld\t{$16, %0|%0, 16}
%{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
@@ -2998,7 +2998,11 @@ (define_expand "truncv2sfv2bf2"
[(set (match_operand:V2BF 0 "register_operand")
(float_truncate:V2BF
(match_operand:V2SF 1 "nonimmediate_operand")))]
- "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
+ "TARGET_SSSE3 && TARGET_MMX_WITH_SSE
+ && !HONOR_NANS (BFmode) && !flag_rounding_math
+ && (flag_unsafe_math_optimizations
+ || TARGET_AVXNECONVERT
+ || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4BFmode);
@@ -3016,7 +3020,7 @@ (define_expand "extendv2bfv2sf2"
[(set (match_operand:V2SF 0 "register_operand")
(float_extend:V2SF
(match_operand:V2BF 1 "nonimmediate_operand")))]
- "TARGET_SSE2 && TARGET_MMX_WITH_SSE"
+ "TARGET_SSE2 && TARGET_MMX_WITH_SSE && !HONOR_NANS (BFmode)"
{
rtx op0 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4BFmode);
@@ -30995,7 +30995,10 @@ (define_expand "truncv4sfv4bf2"
[(set (match_operand:V4BF 0 "register_operand")
(float_truncate:V4BF
(match_operand:V4SF 1 "nonimmediate_operand")))]
- "TARGET_SSSE3"
+ "TARGET_SSSE3 && !HONOR_NANS (BFmode) && !flag_rounding_math
+ && (flag_unsafe_math_optimizations
+ || TARGET_AVXNECONVERT
+ || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
{
if (!TARGET_AVXNECONVERT
&& !(TARGET_AVX512BF16 && TARGET_AVX512VL))
@@ -31088,7 +31091,10 @@ (define_expand "truncv8sfv8bf2"
[(set (match_operand:V8BF 0 "register_operand")
(float_truncate:V8BF
(match_operand:V8SF 1 "nonimmediate_operand")))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && !HONOR_NANS (BFmode) && !flag_rounding_math
+ && (flag_unsafe_math_optimizations
+ || TARGET_AVXNECONVERT
+ || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
{
if (!TARGET_AVXNECONVERT
&& !(TARGET_AVX512BF16 && TARGET_AVX512VL))
@@ -31114,7 +31120,9 @@ (define_expand "truncv16sfv16bf2"
[(set (match_operand:V16BF 0 "register_operand")
(float_truncate:V16BF
(match_operand:V16SF 1 "nonimmediate_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW && TARGET_EVEX512
+ && !HONOR_NANS (BFmode) && !flag_rounding_math
+ && (flag_unsafe_math_optimizations || TARGET_AVX512BF16)"
{
if (!TARGET_AVX512BF16)
{
@@ -31127,7 +31135,7 @@ (define_expand "extend<sf_cvt_bf16_lower><mode>2"
[(set (match_operand:VF1_AVX512BW 0 "register_operand")
(float_extend:VF1_AVX512BW
(match_operand:<sf_cvt_bf16> 1 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !HONOR_NANS (BFmode)"
{
ix86_expand_vector_bf2sf_with_vec_perm (operands[0], operands[1]);
DONE;
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mavx512vl -mavx512bf16 -O2" } */
+/* { dg-options "-mavx512vl -mavx512bf16 -O2 -ffast-math" } */
/* { dg-final { scan-assembler-times {(?n)vcvtneps2bf16} 6 } } */
#include "avx512bw-truncsfbf.c"
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-options "-mavx512bw -mavx512vl -O2 -ffast-math" } */
/* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|vpunpcklwd)} 6 } } */
typedef float v4sf __attribute__((vector_size(16)));
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mavx512bw -mavx512vl -mno-avx512bf16 -mno-avxneconvert -O2" } */
+/* { dg-options "-mavx512bw -mavx512vl -mno-avx512bf16 -mno-avxneconvert -O2 -ffast-math" } */
/* { dg-final { scan-assembler-times {(?n)(?:vpermw|vpshufb)} 6 } } */
typedef float v4sf __attribute__((vector_size(16)));
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-msse2 -O2" } */
+/* { dg-options "-msse2 -O2 -ffast-math" } */
/* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|punpcklwd)} 2 { target { ! ia32 } } } } */
typedef float v2sf __attribute__((vector_size(8)));
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mssse3 -mno-avx512bf16 -mno-avxneconvert -O2" } */
+/* { dg-options "-mssse3 -mno-avx512bf16 -mno-avxneconvert -O2 -ffast-math" } */
/* { dg-final { scan-assembler-times {(?n)pshufb} 2 { target { ! ia32 } } } } */
typedef float v2sf __attribute__((vector_size(8)));