@@ -17721,9 +17721,9 @@ (define_expand "significand<mode>2"
(define_insn "sse4_1_round<mode>2"
- [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v,v")
- (unspec:MODEF
- [(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,v,m")
+ [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
+ (unspec:MODEFH
+ [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m")
(match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n,n")]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
@@ -17980,6 +17980,19 @@ (define_expand "<rounding_insn>xf2"
"TARGET_USE_FANCY_MATH_387
&& (flag_fp_int_builtin_inexact || !flag_trapping_math)")
+(define_expand "<rounding_insn>hf2"
+ [(parallel [(set (match_operand:HF 0 "register_operand")
+ (unspec:HF [(match_operand:HF 1 "register_operand")]
+ FRNDINT_ROUNDING))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_AVX512FP16"
+{
+ emit_insn (gen_sse4_1_roundhf2
+ (operands[0], operands[1],
+ GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
+ DONE;
+})
+
(define_expand "<rounding_insn><mode>2"
[(parallel [(set (match_operand:MODEF 0 "register_operand")
(unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
@@ -20202,14 +20202,14 @@ (define_insn "sse4_1_round<ssescalarmodesuffix>"
(set_attr "mode" "<MODE>")])
(define_insn "*sse4_1_round<ssescalarmodesuffix>"
- [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
- (vec_merge:VF_128
- (vec_duplicate:VF_128
+ [(set (match_operand:VFH_128 0 "register_operand" "=Yr,*x,x,v")
+ (vec_merge:VFH_128
+ (vec_duplicate:VFH_128
(unspec:<ssescalarmode>
[(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
(match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
UNSPEC_ROUND))
- (match_operand:VF_128 1 "register_operand" "0,0,x,v")
+ (match_operand:VFH_128 1 "register_operand" "0,0,x,v")
(const_int 1)))]
"TARGET_SSE4_1"
"@
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16" } */
+
+_Float16
+f1 (_Float16 x)
+{
+ return __builtin_truncf16 (x);
+}
+
+_Float16
+f2 (_Float16 x)
+{
+ return __builtin_floorf16 (x);
+}
+
+_Float16
+f3 (_Float16 x)
+{
+ return __builtin_ceilf16 (x);
+}
+
+_Float16
+f4 (_Float16 x)
+{
+ return __builtin_roundevenf16 (x);
+}
+
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$11\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$10\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$9\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$8\[^\n\r\]*xmm\[0-9\]" 1 } } */