@@ -3638,6 +3638,8 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
return false;
else if (vector_size == 64)
return true;
+ else if (GET_MODE_INNER (cmp_mode) == HFmode)
+ return true;
/* When op_true is NULL, op_false must be NULL, or vice versa. */
gcc_assert (!op_true == !op_false);
@@ -989,9 +989,9 @@ (define_mode_attr sseintvecmode2
(V16HF "OI") (V8HF "TI")])
(define_mode_attr sseintvecmodelower
- [(V16SF "v16si") (V8DF "v8di")
- (V8SF "v8si") (V4DF "v4di")
- (V4SF "v4si") (V2DF "v2di")
+ [(V32HF "v32hi") (V16SF "v16si") (V8DF "v8di")
+ (V16HF "v16hi") (V8SF "v8si") (V4DF "v4di")
+ (V8HF "v8hi") (V4SF "v4si") (V2DF "v2di")
(V8SI "v8si") (V4DI "v4di")
(V4SI "v4si") (V2DI "v2di")
(V16HI "v16hi") (V8HI "v8hi")
@@ -1568,9 +1568,9 @@ (define_insn "<avx512>_store<mode>_mask"
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_store<mode>_mask"
- [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
- (vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:VI12HF_AVX512VL
+ (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
"TARGET_AVX512BW"
@@ -3810,8 +3810,8 @@ (define_insn "<sse>_<unord>comi<round_saeonly_name>"
(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(match_operator:<avx512fmaskmode> 1 ""
- [(match_operand:V48_AVX512VL 2 "register_operand")
- (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
+ [(match_operand:V48H_AVX512VL 2 "register_operand")
+ (match_operand:V48H_AVX512VL 3 "nonimmediate_operand")]))]
"TARGET_AVX512F"
{
bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
@@ -4018,6 +4018,51 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
DONE;
})
+(define_expand "vcond<mode><mode>"
+ [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+ (if_then_else:VF_AVX512FP16VL
+ (match_operator 3 ""
+ [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+ (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+ (match_operand:VF_AVX512FP16VL 1 "general_operand")
+ (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+ "TARGET_AVX512FP16"
+{
+ bool ok = ix86_expand_fp_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vcond<mode><sseintvecmodelower>"
+ [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+ (if_then_else:VF_AVX512FP16VL
+ (match_operator 3 ""
+ [(match_operand:<sseintvecmode> 4 "vector_operand")
+ (match_operand:<sseintvecmode> 5 "vector_operand")])
+ (match_operand:VF_AVX512FP16VL 1 "general_operand")
+ (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+ "TARGET_AVX512FP16"
+{
+ bool ok = ix86_expand_int_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vcond<sseintvecmodelower><mode>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand")
+ (if_then_else:<sseintvecmode>
+ (match_operator 3 ""
+ [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+ (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+ (match_operand:<sseintvecmode> 1 "general_operand")
+ (match_operand:<sseintvecmode> 2 "general_operand")))]
+ "TARGET_AVX512FP16"
+{
+ bool ok = ix86_expand_fp_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
[(set (match_operand:V48_AVX512VL 0 "register_operand")
(vec_merge:V48_AVX512VL
@@ -4027,10 +4072,10 @@ (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
"TARGET_AVX512F")
(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
- (vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
+ (vec_merge:VI12HF_AVX512VL
+ (match_operand:VI12HF_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand")
(match_operand:<avx512fmaskmode> 3 "register_operand")))]
"TARGET_AVX512BW")
@@ -15538,6 +15583,21 @@ (define_expand "vcondu<VI8F_128:mode>v2di"
DONE;
})
+(define_expand "vcondu<mode><sseintvecmodelower>"
+ [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+ (if_then_else:VF_AVX512FP16VL
+ (match_operator 3 ""
+ [(match_operand:<sseintvecmode> 4 "vector_operand")
+ (match_operand:<sseintvecmode> 5 "vector_operand")])
+ (match_operand:VF_AVX512FP16VL 1 "general_operand")
+ (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+ "TARGET_AVX512FP16"
+{
+ bool ok = ix86_expand_int_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcondeq<VI8F_128:mode>v2di"
[(set (match_operand:VI8F_128 0 "register_operand")
(if_then_else:VI8F_128
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vminph" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxph" 3 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCONDMINMAX(size, op, name) \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vminmax_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b) \
+{ \
+ return (a op b) ? a : b; \
+}
+
+VCONDMINMAX (8, <, min)
+VCONDMINMAX (8, >, max)
+VCONDMINMAX (16, <, min)
+VCONDMINMAX (16, >, max)
+VCONDMINMAX (32, <, min)
+VCONDMINMAX (32, >, max)
+
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 45 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpw" 18 } } */
+/* { dg-final { scan-assembler-times "(?:vpblendmw|vmovdqu16\[^\{\n\]+\{%k\[1-7\]\})" 75 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef unsigned short v8uhi __attribute__ ((vector_size (16)));
+typedef unsigned short v16uhi __attribute__ ((vector_size (32)));
+typedef unsigned short v32uhi __attribute__ ((vector_size (64)));
+
+#define VCONDMOV(size, op, name) \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b, \
+ v##size##hf c, v##size##hf d) \
+{ \
+ return (a op b) ? c : d; \
+}\
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hi##v##size##hf##name (v##size##hi a, v##size##hi b, \
+ v##size##hf c, v##size##hf d) \
+{ \
+ return (a op b) ? c : d; \
+}\
+v##size##hi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hi##name (v##size##hi a, v##size##hi b, \
+ v##size##hf c, v##size##hf d) \
+{ \
+ return (c op d) ? a : b; \
+} \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##uhi##v##size##hf##name (v##size##uhi a, v##size##uhi b, \
+ v##size##hf c, v##size##hf d) \
+{ \
+ return (a op b) ? c : d; \
+}\
+v##size##uhi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##uhi##name (v##size##uhi a, v##size##uhi b, \
+ v##size##hf c, v##size##hf d) \
+{ \
+ return (c op d) ? a : b; \
+} \
+
+VCONDMOV (8, <, lt)
+VCONDMOV (8, >, gt)
+VCONDMOV (8, ==, eq)
+VCONDMOV (8, <=, le)
+VCONDMOV (8, >=, ge)
+VCONDMOV (16, <, lt)
+VCONDMOV (16, >, gt)
+VCONDMOV (16, <=, le)
+VCONDMOV (16, >=, ge)
+VCONDMOV (16, ==, eq)
+VCONDMOV (32, <, lt)
+VCONDMOV (32, >, gt)
+VCONDMOV (32, <=, le)
+VCONDMOV (32, >=, ge)
+VCONDMOV (32, ==, eq)
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 27 } } */
+/* { dg-final { scan-assembler-times "(?:vpcmpw|vpcmpeqw)" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 6 } } */
+
+typedef unsigned short u16;
+typedef short s16;
+
+#define CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone, optimize("tree-vectorize"))) \
+loop_cond_##size##ptype##type##name ( \
+ ptype * restrict a, ptype * restrict b, \
+ type * restrict c, type * restrict d) \
+{ \
+ int i; \
+ for (i = 0; i < size; i++) \
+ { \
+ if (a[i] op b[i]) \
+ d[i] = c[i]; \
+ } \
+}
+
+CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, s16, <, lt)
+CONDMOV_LOOP (32, _Float16, s16, >, gt)
+CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+CONDMOV_LOOP (16, _Float16, s16, <, lt)
+CONDMOV_LOOP (16, _Float16, s16, >, gt)
+CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+CONDMOV_LOOP (8, _Float16, s16, <, lt)
+CONDMOV_LOOP (8, _Float16, s16, >, gt)
+CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+CONDMOV_LOOP (32, s16, _Float16, <, lt)
+CONDMOV_LOOP (32, s16, _Float16, >, gt)
+CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+CONDMOV_LOOP (16, s16, _Float16, <, lt)
+CONDMOV_LOOP (16, s16, _Float16, >, gt)
+CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+CONDMOV_LOOP (8, s16, _Float16, <, lt)
+CONDMOV_LOOP (8, s16, _Float16, >, gt)
+CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, u16, <, lt)
+CONDMOV_LOOP (32, _Float16, u16, >, gt)
+CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+CONDMOV_LOOP (16, _Float16, u16, <, lt)
+CONDMOV_LOOP (16, _Float16, u16, >, gt)
+CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+CONDMOV_LOOP (8, _Float16, u16, <, lt)
+CONDMOV_LOOP (8, _Float16, u16, >, gt)
+CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+CONDMOV_LOOP (32, u16, _Float16, <, lt)
+CONDMOV_LOOP (32, u16, _Float16, >, gt)
+CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+CONDMOV_LOOP (16, u16, _Float16, <, lt)
+CONDMOV_LOOP (16, u16, _Float16, >, gt)
+CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+CONDMOV_LOOP (8, u16, _Float16, <, lt)
+CONDMOV_LOOP (8, u16, _Float16, >, gt)
+CONDMOV_LOOP (8, u16, _Float16, ==, eq)
new file mode 100644
@@ -0,0 +1,143 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+static void condmov_test (void);
+#define DO_TEST condmov_test
+#define AVX512FP16
+#define AVX512VL
+#include "avx512f-check.h"
+#include "avx512fp16-vcondmn-loop-1.c"
+
+_Float16 a[32], b[32], c[32], fexp[32], fref[32];
+s16 sa[32], sb[32], sc[32], sexp[32], sref[32];
+u16 ua[32], ub[32], uc[32], uexp[32], uref[32];
+
+#define EMULATE_CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone)) \
+scalar_cond_##size##ptype##type##name ( \
+ ptype * restrict a, ptype * restrict b, \
+ type * restrict c, type * restrict d) \
+{ \
+ int i; \
+ for (i = 0; i < size; i++) \
+ { \
+ if (a[i] op b[i]) \
+ d[i] = c[i]; \
+ } \
+}
+
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, ==, eq)
+
+void init()
+{
+ int i;
+ for (i = 0; i < 32; i++)
+ {
+ ua[i] = sa[i] = a[i] = i;
+ ub[i] = sb[i] = b[i] = i;
+ uc[i] = sc[i] = c[i] = (32 - i) * 2;
+ uexp[i] = sexp[i] = fexp[i] = -1;
+ uref[i] = sref[i] = fref[i] = -1;
+ }
+}
+
+int check_cond(void *a, void *b, int size)
+{
+ int i;
+ u16 *pa = (u16 *)a, *pb = (u16 *)b;
+ for (i = 0; i < size; i++)
+ if (pa[i] != pb[i])
+ return 0;
+ return 1;
+}
+
+#define TEST_CONDMOV_LOOP(size, name) \
+{ \
+ init (); \
+ scalar_cond_##size##_Float16_Float16##name (a, b, c, fexp); \
+ loop_cond_##size##_Float16_Float16##name (a, b, c, fref); \
+ if (!check_cond ((void *)fexp, (void *)fref, size)) \
+ abort(); \
+ \
+ init (); \
+ scalar_cond_##size##_Float16s16##name (a, b, sc, sexp); \
+ loop_cond_##size##_Float16s16##name (a, b, sc, sref); \
+ if (!check_cond ((void *)sexp, (void *)sref, size)) \
+ abort(); \
+ \
+ init (); \
+ scalar_cond_##size##s16_Float16##name (sa, sb, c, fexp); \
+ loop_cond_##size##s16_Float16##name (sa, sb, c, fref); \
+ if (!check_cond ((void *)fexp, (void *)fref, size)) \
+ abort(); \
+ \
+ init (); \
+ scalar_cond_##size##_Float16u16##name (a, b, uc, uexp); \
+ loop_cond_##size##_Float16u16##name (a, b, uc, uref); \
+ if (!check_cond ((void *)uexp, (void *)uref, size)) \
+ abort(); \
+ \
+ init (); \
+ scalar_cond_##size##u16_Float16##name (ua, ub, c, fexp); \
+ loop_cond_##size##u16_Float16##name (ua, ub, c, fref); \
+ if (!check_cond ((void *)fexp, (void *)fref, size)) \
+ abort(); \
+}
+
+static void condmov_test()
+{
+ TEST_CONDMOV_LOOP (32, lt)
+ TEST_CONDMOV_LOOP (32, gt)
+ TEST_CONDMOV_LOOP (32, eq)
+ TEST_CONDMOV_LOOP (16, lt)
+ TEST_CONDMOV_LOOP (16, gt)
+ TEST_CONDMOV_LOOP (16, eq)
+ TEST_CONDMOV_LOOP (8, lt)
+ TEST_CONDMOV_LOOP (8, gt)
+ TEST_CONDMOV_LOOP (8, eq)
+}
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 15 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCMPMN(type, op, name) \
+type \
+__attribute__ ((noinline, noclone)) \
+vec_cmp_##type##type##name (type a, type b) \
+{ \
+ return a op b; \
+}
+
+VCMPMN (v8hf, <, lt)
+VCMPMN (v16hf, <, lt)
+VCMPMN (v32hf, <, lt)
+VCMPMN (v8hf, <=, le)
+VCMPMN (v16hf, <=, le)
+VCMPMN (v32hf, <=, le)
+VCMPMN (v8hf, >, gt)
+VCMPMN (v16hf, >, gt)
+VCMPMN (v32hf, >, gt)
+VCMPMN (v8hf, >=, ge)
+VCMPMN (v16hf, >=, ge)
+VCMPMN (v32hf, >=, ge)
+VCMPMN (v8hf, ==, eq)
+VCMPMN (v16hf, ==, eq)
+VCMPMN (v32hf, ==, eq)
From: Hongyu Wang <hongyu.wang@intel.com> gcc/ChangeLog: * config/i386/i386-expand.c (ix86_use_mask_cmp_p): Enable HFmode mask_cmp. * config/i386/sse.md (sseintvecmodelower): Add HF vector modes. (<avx512>_store<mode>_mask): Extend to support HF vector modes. (vec_cmp<mode><avx512fmaskmodelower>): Likewise. (vcond_mask_<mode><avx512fmaskmodelower>): Likewise. (vcond<mode><mode>): New expander. (vcond<mode><sseintvecmodelower>): Likewise. (vcond<sseintvecmodelower><mode>): Likewise. (vcondu<mode><sseintvecmodelower>): Likewise. gcc/testsuite/ChangeLog: * g++.target/i386/avx512fp16-vcondmn-vec.C: New test. * g++.target/i386/avx512fp16-vcondmn-minmax.C: Ditto. * gcc.target/i386/avx512fp16-vcondmn-loop-1.c: Ditto. * gcc.target/i386/avx512fp16-vcondmn-loop-2.c: Ditto. * gcc.target/i386/avx512fp16-vec_cmpmn.c: Ditto. --- gcc/config/i386/i386-expand.c | 2 + gcc/config/i386/sse.md | 84 ++++++++-- .../i386/avx512fp16-vcondmn-minmax.C | 25 +++ .../g++.target/i386/avx512fp16-vcondmn-vec.C | 70 +++++++++ .../i386/avx512fp16-vcondmn-loop-1.c | 70 +++++++++ .../i386/avx512fp16-vcondmn-loop-2.c | 143 ++++++++++++++++++ .../gcc.target/i386/avx512fp16-vec_cmpmn.c | 32 ++++ 7 files changed, 414 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c