[7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes.

Message ID	20210923054640.1406227-8-hongtao.liu@intel.com
State	New
Headers	show Return-Path: <gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 308873857C48 To: gcc-patches@gcc.gnu.org Subject: [PATCH 7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes. Date: Thu, 23 Sep 2021 13:46:40 +0800 Message-Id: <20210923054640.1406227-8-hongtao.liu@intel.com> In-Reply-To: <20210923054640.1406227-1-hongtao.liu@intel.com> References: <20210923054640.1406227-1-hongtao.liu@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list From: liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> Reply-To: liuhongt <hongtao.liu@intel.com> Cc: Hongyu Wang <hongyu.wang@intel.com> Errors-To: gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org>
Series	AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes \| expand [0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes [1/7] AVX512FP16: Add expander for rint/nearbyinthf2. [2/7] AVX512FP16: Add expander for fmahf4 [3/7] AVX512FP16: Add expander for smin/maxhf3. [4/7] AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes [5/7] AVX512FP16: Add float(uns)?mn2 expander [6/7] AVX512FP16: add truncmn2/extendmn2 expanders [7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes.

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index dbbf5e34656..94ac303585e 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3638,6 +3638,8 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode, return false; else if (vector_size == 64) return true; + else if (GET_MODE_INNER (cmp_mode) == HFmode) + return true; /* When op_true is NULL, op_false must be NULL, or vice versa. */ gcc_assert (!op_true == !op_false); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a48c8e8bede..084fc7f4693 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -989,9 +989,9 @@ (define_mode_attr sseintvecmode2 (V16HF "OI") (V8HF "TI")]) (define_mode_attr sseintvecmodelower - [(V16SF "v16si") (V8DF "v8di") - (V8SF "v8si") (V4DF "v4di") - (V4SF "v4si") (V2DF "v2di") + [(V32HF "v32hi") (V16SF "v16si") (V8DF "v8di") + (V16HF "v16hi") (V8SF "v8si") (V4DF "v4di") + (V8HF "v8hi") (V4SF "v4si") (V2DF "v2di") (V8SI "v8si") (V4DI "v4di") (V4SI "v4si") (V2DI "v2di") (V16HI "v16hi") (V8HI "v8hi") @@ -1568,9 +1568,9 @@ (define_insn "<avx512>_store<mode>_mask" (set_attr "mode" "<sseinsnmode>")]) (define_insn "<avx512>_store<mode>_mask" - [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "register_operand" "v") + [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m") + (vec_merge:VI12HF_AVX512VL + (match_operand:VI12HF_AVX512VL 1 "register_operand" "v") (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] "TARGET_AVX512BW" @@ -3810,8 +3810,8 @@ (define_insn "<sse>_<unord>comi<round_saeonly_name>" (define_expand "vec_cmp<mode><avx512fmaskmodelower>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (match_operator:<avx512fmaskmode> 1 "" - [(match_operand:V48_AVX512VL 2 "register_operand") - (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))] + [(match_operand:V48H_AVX512VL 2 "register_operand") + (match_operand:V48H_AVX512VL 3 "nonimmediate_operand")]))] "TARGET_AVX512F" { bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]), @@ -4018,6 +4018,51 @@ (define_expand "vcond<V_128:mode><VF_128:mode>" DONE; }) +(define_expand "vcond<mode><mode>" + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") + (if_then_else:VF_AVX512FP16VL + (match_operator 3 "" + [(match_operand:VF_AVX512FP16VL 4 "vector_operand") + (match_operand:VF_AVX512FP16VL 5 "vector_operand")]) + (match_operand:VF_AVX512FP16VL 1 "general_operand") + (match_operand:VF_AVX512FP16VL 2 "general_operand")))] + "TARGET_AVX512FP16" +{ + bool ok = ix86_expand_fp_vcond (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vcond<mode><sseintvecmodelower>" + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") + (if_then_else:VF_AVX512FP16VL + (match_operator 3 "" + [(match_operand:<sseintvecmode> 4 "vector_operand") + (match_operand:<sseintvecmode> 5 "vector_operand")]) + (match_operand:VF_AVX512FP16VL 1 "general_operand") + (match_operand:VF_AVX512FP16VL 2 "general_operand")))] + "TARGET_AVX512FP16" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vcond<sseintvecmodelower><mode>" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (if_then_else:<sseintvecmode> + (match_operator 3 "" + [(match_operand:VF_AVX512FP16VL 4 "vector_operand") + (match_operand:VF_AVX512FP16VL 5 "vector_operand")]) + (match_operand:<sseintvecmode> 1 "general_operand") + (match_operand:<sseintvecmode> 2 "general_operand")))] + "TARGET_AVX512FP16" +{ + bool ok = ix86_expand_fp_vcond (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcond_mask_<mode><avx512fmaskmodelower>" [(set (match_operand:V48_AVX512VL 0 "register_operand") (vec_merge:V48_AVX512VL @@ -4027,10 +4072,10 @@ (define_expand "vcond_mask_<mode><avx512fmaskmodelower>" "TARGET_AVX512F") (define_expand "vcond_mask_<mode><avx512fmaskmodelower>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "nonimmediate_operand") - (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand") + [(set (match_operand:VI12HF_AVX512VL 0 "register_operand") + (vec_merge:VI12HF_AVX512VL + (match_operand:VI12HF_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand") (match_operand:<avx512fmaskmode> 3 "register_operand")))] "TARGET_AVX512BW") @@ -15538,6 +15583,21 @@ (define_expand "vcondu<VI8F_128:mode>v2di" DONE; }) +(define_expand "vcondu<mode><sseintvecmodelower>" + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") + (if_then_else:VF_AVX512FP16VL + (match_operator 3 "" + [(match_operand:<sseintvecmode> 4 "vector_operand") + (match_operand:<sseintvecmode> 5 "vector_operand")]) + (match_operand:VF_AVX512FP16VL 1 "general_operand") + (match_operand:VF_AVX512FP16VL 2 "general_operand")))] + "TARGET_AVX512FP16" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcondeq<VI8F_128:mode>v2di" [(set (match_operand:VI8F_128 0 "register_operand") (if_then_else:VI8F_128 diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C new file mode 100644 index 00000000000..6d50f4974c5 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ + +/* { dg-final { scan-assembler-times "vminph" 3 } } */ +/* { dg-final { scan-assembler-times "vmaxph" 3 } } */ + +typedef _Float16 v8hf __attribute__ ((vector_size (16))); +typedef _Float16 v16hf __attribute__ ((vector_size (32))); +typedef _Float16 v32hf __attribute__ ((vector_size (64))); + +#define VCONDMINMAX(size, op, name) \ +v##size##hf \ +__attribute__ ((noinline, noclone)) \ +vminmax_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b) \ +{ \ + return (a op b) ? a : b; \ +} + +VCONDMINMAX (8, <, min) +VCONDMINMAX (8, >, max) +VCONDMINMAX (16, <, min) +VCONDMINMAX (16, >, max) +VCONDMINMAX (32, <, min) +VCONDMINMAX (32, >, max) + diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C new file mode 100644 index 00000000000..de93e2c5c86 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ + +/* { dg-final { scan-assembler-times "vcmpph" 45 } } */ +/* { dg-final { scan-assembler-times "vpcmpuw" 12 } } */ +/* { dg-final { scan-assembler-times "vpcmpw" 18 } } */ +/* { dg-final { scan-assembler-times "(?:vpblendmw|vmovdqu16\[^\{\n\]+\{%k\[1-7\]\})" 75 } } */ + +typedef _Float16 v8hf __attribute__ ((vector_size (16))); +typedef _Float16 v16hf __attribute__ ((vector_size (32))); +typedef _Float16 v32hf __attribute__ ((vector_size (64))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef short v16hi __attribute__ ((vector_size (32))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef unsigned short v8uhi __attribute__ ((vector_size (16))); +typedef unsigned short v16uhi __attribute__ ((vector_size (32))); +typedef unsigned short v32uhi __attribute__ ((vector_size (64))); + +#define VCONDMOV(size, op, name) \ +v##size##hf \ +__attribute__ ((noinline, noclone)) \ +vcond_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b, \ + v##size##hf c, v##size##hf d) \ +{ \ + return (a op b) ? c : d; \ +}\ +v##size##hf \ +__attribute__ ((noinline, noclone)) \ +vcond_##v##size##hi##v##size##hf##name (v##size##hi a, v##size##hi b, \ + v##size##hf c, v##size##hf d) \ +{ \ + return (a op b) ? c : d; \ +}\ +v##size##hi \ +__attribute__ ((noinline, noclone)) \ +vcond_##v##size##hf##v##size##hi##name (v##size##hi a, v##size##hi b, \ + v##size##hf c, v##size##hf d) \ +{ \ + return (c op d) ? a : b; \ +} \ +v##size##hf \ +__attribute__ ((noinline, noclone)) \ +vcond_##v##size##uhi##v##size##hf##name (v##size##uhi a, v##size##uhi b, \ + v##size##hf c, v##size##hf d) \ +{ \ + return (a op b) ? c : d; \ +}\ +v##size##uhi \ +__attribute__ ((noinline, noclone)) \ +vcond_##v##size##hf##v##size##uhi##name (v##size##uhi a, v##size##uhi b, \ + v##size##hf c, v##size##hf d) \ +{ \ + return (c op d) ? a : b; \ +} \ + +VCONDMOV (8, <, lt) +VCONDMOV (8, >, gt) +VCONDMOV (8, ==, eq) +VCONDMOV (8, <=, le) +VCONDMOV (8, >=, ge) +VCONDMOV (16, <, lt) +VCONDMOV (16, >, gt) +VCONDMOV (16, <=, le) +VCONDMOV (16, >=, ge) +VCONDMOV (16, ==, eq) +VCONDMOV (32, <, lt) +VCONDMOV (32, >, gt) +VCONDMOV (32, <=, le) +VCONDMOV (32, >=, ge) +VCONDMOV (32, ==, eq) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c new file mode 100644 index 00000000000..e8745aba64e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */ + +/* { dg-final { scan-assembler-times "vcmpph" 27 } } */ +/* { dg-final { scan-assembler-times "(?:vpcmpw|vpcmpeqw)" 12 } } */ +/* { dg-final { scan-assembler-times "vpcmpuw" 6 } } */ + +typedef unsigned short u16; +typedef short s16; + +#define CONDMOV_LOOP(size, type, ptype, op, name) \ +void \ +__attribute__ ((noinline, noclone, optimize("tree-vectorize"))) \ +loop_cond_##size##ptype##type##name ( \ + ptype * restrict a, ptype * restrict b, \ + type * restrict c, type * restrict d) \ +{ \ + int i; \ + for (i = 0; i < size; i++) \ + { \ + if (a[i] op b[i]) \ + d[i] = c[i]; \ + } \ +} + +CONDMOV_LOOP (32, _Float16, _Float16, <, lt) +CONDMOV_LOOP (32, _Float16, _Float16, >, gt) +CONDMOV_LOOP (32, _Float16, _Float16, ==, eq) +CONDMOV_LOOP (16, _Float16, _Float16, <, lt) +CONDMOV_LOOP (16, _Float16, _Float16, >, gt) +CONDMOV_LOOP (16, _Float16, _Float16, ==, eq) +CONDMOV_LOOP (8, _Float16, _Float16, <, lt) +CONDMOV_LOOP (8, _Float16, _Float16, >, gt) +CONDMOV_LOOP (8, _Float16, _Float16, ==, eq) +CONDMOV_LOOP (32, _Float16, s16, <, lt) +CONDMOV_LOOP (32, _Float16, s16, >, gt) +CONDMOV_LOOP (32, _Float16, s16, ==, eq) +CONDMOV_LOOP (16, _Float16, s16, <, lt) +CONDMOV_LOOP (16, _Float16, s16, >, gt) +CONDMOV_LOOP (16, _Float16, s16, ==, eq) +CONDMOV_LOOP (8, _Float16, s16, <, lt) +CONDMOV_LOOP (8, _Float16, s16, >, gt) +CONDMOV_LOOP (8, _Float16, s16, ==, eq) +CONDMOV_LOOP (32, s16, _Float16, <, lt) +CONDMOV_LOOP (32, s16, _Float16, >, gt) +CONDMOV_LOOP (32, s16, _Float16, ==, eq) +CONDMOV_LOOP (16, s16, _Float16, <, lt) +CONDMOV_LOOP (16, s16, _Float16, >, gt) +CONDMOV_LOOP (16, s16, _Float16, ==, eq) +CONDMOV_LOOP (8, s16, _Float16, <, lt) +CONDMOV_LOOP (8, s16, _Float16, >, gt) +CONDMOV_LOOP (8, s16, _Float16, ==, eq) +CONDMOV_LOOP (32, _Float16, u16, <, lt) +CONDMOV_LOOP (32, _Float16, u16, >, gt) +CONDMOV_LOOP (32, _Float16, u16, ==, eq) +CONDMOV_LOOP (16, _Float16, u16, <, lt) +CONDMOV_LOOP (16, _Float16, u16, >, gt) +CONDMOV_LOOP (16, _Float16, u16, ==, eq) +CONDMOV_LOOP (8, _Float16, u16, <, lt) +CONDMOV_LOOP (8, _Float16, u16, >, gt) +CONDMOV_LOOP (8, _Float16, u16, ==, eq) +CONDMOV_LOOP (32, u16, _Float16, <, lt) +CONDMOV_LOOP (32, u16, _Float16, >, gt) +CONDMOV_LOOP (32, u16, _Float16, ==, eq) +CONDMOV_LOOP (16, u16, _Float16, <, lt) +CONDMOV_LOOP (16, u16, _Float16, >, gt) +CONDMOV_LOOP (16, u16, _Float16, ==, eq) +CONDMOV_LOOP (8, u16, _Float16, <, lt) +CONDMOV_LOOP (8, u16, _Float16, >, gt) +CONDMOV_LOOP (8, u16, _Float16, ==, eq) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c new file mode 100644 index 00000000000..a0d5f988088 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c @@ -0,0 +1,143 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */ + +static void condmov_test (void); +#define DO_TEST condmov_test +#define AVX512FP16 +#define AVX512VL +#include "avx512f-check.h" +#include "avx512fp16-vcondmn-loop-1.c" + +_Float16 a[32], b[32], c[32], fexp[32], fref[32]; +s16 sa[32], sb[32], sc[32], sexp[32], sref[32]; +u16 ua[32], ub[32], uc[32], uexp[32], uref[32]; + +#define EMULATE_CONDMOV_LOOP(size, type, ptype, op, name) \ +void \ +__attribute__ ((noinline, noclone)) \ +scalar_cond_##size##ptype##type##name ( \ + ptype * restrict a, ptype * restrict b, \ + type * restrict c, type * restrict d) \ +{ \ + int i; \ + for (i = 0; i < size; i++) \ + { \ + if (a[i] op b[i]) \ + d[i] = c[i]; \ + } \ +} + +EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (32, _Float16, s16, <, lt) +EMULATE_CONDMOV_LOOP (32, _Float16, s16, >, gt) +EMULATE_CONDMOV_LOOP (32, _Float16, s16, ==, eq) +EMULATE_CONDMOV_LOOP (16, _Float16, s16, <, lt) +EMULATE_CONDMOV_LOOP (16, _Float16, s16, >, gt) +EMULATE_CONDMOV_LOOP (16, _Float16, s16, ==, eq) +EMULATE_CONDMOV_LOOP (8, _Float16, s16, <, lt) +EMULATE_CONDMOV_LOOP (8, _Float16, s16, >, gt) +EMULATE_CONDMOV_LOOP (8, _Float16, s16, ==, eq) +EMULATE_CONDMOV_LOOP (32, s16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (32, s16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (32, s16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (16, s16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (16, s16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (16, s16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (8, s16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (8, s16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (8, s16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (32, _Float16, u16, <, lt) +EMULATE_CONDMOV_LOOP (32, _Float16, u16, >, gt) +EMULATE_CONDMOV_LOOP (32, _Float16, u16, ==, eq) +EMULATE_CONDMOV_LOOP (16, _Float16, u16, <, lt) +EMULATE_CONDMOV_LOOP (16, _Float16, u16, >, gt) +EMULATE_CONDMOV_LOOP (16, _Float16, u16, ==, eq) +EMULATE_CONDMOV_LOOP (8, _Float16, u16, <, lt) +EMULATE_CONDMOV_LOOP (8, _Float16, u16, >, gt) +EMULATE_CONDMOV_LOOP (8, _Float16, u16, ==, eq) +EMULATE_CONDMOV_LOOP (32, u16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (32, u16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (32, u16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (16, u16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (16, u16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (16, u16, _Float16, ==, eq) +EMULATE_CONDMOV_LOOP (8, u16, _Float16, <, lt) +EMULATE_CONDMOV_LOOP (8, u16, _Float16, >, gt) +EMULATE_CONDMOV_LOOP (8, u16, _Float16, ==, eq) + +void init() +{ + int i; + for (i = 0; i < 32; i++) + { + ua[i] = sa[i] = a[i] = i; + ub[i] = sb[i] = b[i] = i; + uc[i] = sc[i] = c[i] = (32 - i) * 2; + uexp[i] = sexp[i] = fexp[i] = -1; + uref[i] = sref[i] = fref[i] = -1; + } +} + +int check_cond(void *a, void *b, int size) +{ + int i; + u16 *pa = (u16 *)a, *pb = (u16 *)b; + for (i = 0; i < size; i++) + if (pa[i] != pb[i]) + return 0; + return 1; +} + +#define TEST_CONDMOV_LOOP(size, name) \ +{ \ + init (); \ + scalar_cond_##size##_Float16_Float16##name (a, b, c, fexp); \ + loop_cond_##size##_Float16_Float16##name (a, b, c, fref); \ + if (!check_cond ((void *)fexp, (void *)fref, size)) \ + abort(); \ + \ + init (); \ + scalar_cond_##size##_Float16s16##name (a, b, sc, sexp); \ + loop_cond_##size##_Float16s16##name (a, b, sc, sref); \ + if (!check_cond ((void *)sexp, (void *)sref, size)) \ + abort(); \ + \ + init (); \ + scalar_cond_##size##s16_Float16##name (sa, sb, c, fexp); \ + loop_cond_##size##s16_Float16##name (sa, sb, c, fref); \ + if (!check_cond ((void *)fexp, (void *)fref, size)) \ + abort(); \ + \ + init (); \ + scalar_cond_##size##_Float16u16##name (a, b, uc, uexp); \ + loop_cond_##size##_Float16u16##name (a, b, uc, uref); \ + if (!check_cond ((void *)uexp, (void *)uref, size)) \ + abort(); \ + \ + init (); \ + scalar_cond_##size##u16_Float16##name (ua, ub, c, fexp); \ + loop_cond_##size##u16_Float16##name (ua, ub, c, fref); \ + if (!check_cond ((void *)fexp, (void *)fref, size)) \ + abort(); \ +} + +static void condmov_test() +{ + TEST_CONDMOV_LOOP (32, lt) + TEST_CONDMOV_LOOP (32, gt) + TEST_CONDMOV_LOOP (32, eq) + TEST_CONDMOV_LOOP (16, lt) + TEST_CONDMOV_LOOP (16, gt) + TEST_CONDMOV_LOOP (16, eq) + TEST_CONDMOV_LOOP (8, lt) + TEST_CONDMOV_LOOP (8, gt) + TEST_CONDMOV_LOOP (8, eq) +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c new file mode 100644 index 00000000000..ef9f85373f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ + +/* { dg-final { scan-assembler-times "vcmpph" 15 } } */ + +typedef _Float16 v8hf __attribute__ ((vector_size (16))); +typedef _Float16 v16hf __attribute__ ((vector_size (32))); +typedef _Float16 v32hf __attribute__ ((vector_size (64))); + +#define VCMPMN(type, op, name) \ +type \ +__attribute__ ((noinline, noclone)) \ +vec_cmp_##type##type##name (type a, type b) \ +{ \ + return a op b; \ +} + +VCMPMN (v8hf, <, lt) +VCMPMN (v16hf, <, lt) +VCMPMN (v32hf, <, lt) +VCMPMN (v8hf, <=, le) +VCMPMN (v16hf, <=, le) +VCMPMN (v32hf, <=, le) +VCMPMN (v8hf, >, gt) +VCMPMN (v16hf, >, gt) +VCMPMN (v32hf, >, gt) +VCMPMN (v8hf, >=, ge) +VCMPMN (v16hf, >=, ge) +VCMPMN (v32hf, >=, ge) +VCMPMN (v8hf, ==, eq) +VCMPMN (v16hf, ==, eq) +VCMPMN (v32hf, ==, eq)

[7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes.

Commit Message

Patch