Message ID | 20211014023915.78690-1-hongyu.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | AVX512FP16: Support vector shuffle builtins | expand |
On Thu, Oct 14, 2021 at 10:39 AM Hongyu Wang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > > This patch supports HFmode vector shuffle by creating HImode subreg when > expanding permutation expr. > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} and sde{-m32,} > OK for master? > > gcc/ChangeLog: > > * config/i386/i386-expand.c (ix86_expand_vec_perm): Convert > HFmode input operand to HImode. > (ix86_vectorize_vec_perm_const): Likewise. > (ix86_expand_vector_init): Allow HFmode for one_operand_shuffle. > * config/i386/sse.md (*avx512bw_permvar_truncv16siv16hi_1_hf): > New define_insn. > (*avx512f_permvar_truncv8siv8hi_1_hf): > Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512fp16-builtin_shuffle-1.c: New test. > * gcc.target/i386/avx512fp16-pr101846.c: Ditto. > * gcc.target/i386/avx512fp16-pr94680.c: Ditto. > --- > gcc/config/i386/i386-expand.c | 29 ++++++- > gcc/config/i386/sse.md | 54 +++++++++++- > .../i386/avx512fp16-builtin_shuffle-1.c | 86 +++++++++++++++++++ > .../gcc.target/i386/avx512fp16-pr101846.c | 56 ++++++++++++ > .../gcc.target/i386/avx512fp16-pr94680.c | 61 +++++++++++++ > 5 files changed, 284 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index c0924a59efb..0f50ed3b9f8 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -4836,6 +4836,18 @@ ix86_expand_vec_perm (rtx operands[]) > e = GET_MODE_UNIT_SIZE (mode); > gcc_assert (w <= 64); > > + if (GET_MODE_INNER (mode) == HFmode) > + { > + machine_mode orig_mode = mode; > + mode = mode_for_vector (HImode, w).require (); > + if (target) > + target = lowpart_subreg (mode, target, orig_mode); > + if (op0) > + op0 = lowpart_subreg (mode, op0, orig_mode); > + if (op1) > + op1 = lowpart_subreg (mode, op1, orig_mode); > + } > + > if (TARGET_AVX512F && one_operand_shuffle) > { > rtx (*gen) (rtx, rtx, rtx) = NULL; > @@ -15092,7 +15104,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) > rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; > if (inner_mode == QImode > || inner_mode == HImode > - || inner_mode == TImode) > + || inner_mode == TImode > + || inner_mode == HFmode) This part seems not related to vector shuffle. > { > unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); > scalar_mode elt_mode = inner_mode == TImode ? DImode : SImode; > @@ -21099,6 +21112,20 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, > unsigned int i, nelt, which; > bool two_args; > > + /* For HF mode vector, convert it to HI using subreg. */ > + if (GET_MODE_INNER (vmode) == HFmode) > + { > + machine_mode orig_mode = vmode; > + vmode = mode_for_vector (HImode, > + GET_MODE_NUNITS (vmode)).require (); > + if (target) > + target = lowpart_subreg (vmode, target, orig_mode); > + if (op0) > + op0 = lowpart_subreg (vmode, op0, orig_mode); > + if (op1) > + op1 = lowpart_subreg (vmode, op1, orig_mode); > + } > + > d.target = target; > d.op0 = op0; > d.op1 = op1; > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index a3c4a3f1e62..d023d8a1c2e 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -12573,6 +12573,33 @@ > (truncate:V16HI (match_dup 1)))] > "operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);") > > +(define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf" > + [(set (match_operand:V16HF 0 "nonimmediate_operand") > + (vec_select:V16HF > + (subreg:V32HF > + (unspec:V32HI > + [(match_operand:V32HI 1 "register_operand") > + (match_operand:V32HI 2 "permvar_truncate_operand")] > + UNSPEC_VPERMVAR) 0) > + (parallel [(const_int 0) (const_int 1) > + (const_int 2) (const_int 3) > + (const_int 4) (const_int 5) > + (const_int 6) (const_int 7) > + (const_int 8) (const_int 9) > + (const_int 10) (const_int 11) > + (const_int 12) (const_int 13) > + (const_int 14) (const_int 15)])))] > + "TARGET_AVX512BW && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (truncate:V16HI (match_dup 1)))] > +{ > + operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode); > + operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode); > +}) > + > + > (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1" > [(set (match_operand:V8HI 0 "nonimmediate_operand") > (vec_select:V8HI > @@ -12591,6 +12618,28 @@ > (truncate:V8HI (match_dup 1)))] > "operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);") > > +(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf" > + [(set (match_operand:V8HF 0 "nonimmediate_operand") > + (vec_select:V8HF > + (subreg:V16HF > + (unspec:V16HI > + [(match_operand:V16HI 1 "register_operand") > + (match_operand:V16HI 2 "permvar_truncate_operand")] > + UNSPEC_VPERMVAR) 0) > + (parallel [(const_int 0) (const_int 1) > + (const_int 2) (const_int 3) > + (const_int 4) (const_int 5) > + (const_int 6) (const_int 7)])))] > + "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (truncate:V8HI (match_dup 1)))] > +{ > + operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode); > + operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode); > +}) > + > (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1" > [(set (match_operand:V8SI 0 "nonimmediate_operand") > (vec_select:V8SI > @@ -15603,12 +15652,15 @@ > > (define_mode_iterator VEC_PERM_AVX2 > [V16QI V8HI V4SI V2DI V4SF V2DF > + (V8HF "TARGET_AVX512FP16") > (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") > (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") > (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") > + (V16HF "TARGET_AVX512FP16") > (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") > (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") > - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) > + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") > + (V32HF "TARGET_AVX512FP16")]) > > (define_expand "vec_perm<mode>" > [(match_operand:VEC_PERM_AVX2 0 "register_operand") > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > new file mode 100644 > index 00000000000..89d3567a66b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > @@ -0,0 +1,86 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > +/* { dg-final { scan-assembler-not "movw" } } */ > +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ > +/* { dg-final { scan-assembler-times "vpermw" 6 } } */ > +/* { dg-final { scan-assembler-times "vpshufb" 3 } } */ > +/* { dg-final { scan-assembler-times "vpermt2w" 6 } } */ > + > +typedef _Float16 v32hf __attribute__((vector_size (64))); > +typedef _Float16 v16hf __attribute__((vector_size (32))); > +typedef _Float16 v8hf __attribute__((vector_size (16))); > +typedef short v32hi __attribute__((vector_size (64))); > +typedef short v16hi __attribute__((vector_size (32))); > +typedef short v8hi __attribute__((vector_size (16))); > + > +#define PERM_CONST_RANDOM_v32hi \ > +{ 0, 21, 15, 9, 43, 25, 37, 48, \ > + 8, 16, 27, 51, 30, 12, 6, 46, \ > + 34, 3, 11, 5, 17, 53, 26, 39, \ > + 2, 18, 40, 61, 19, 4, 50, 29 } > + > +#define PERM_CONST_RANDOM_RANGE32_v32hi \ > +{ 0, 21, 10, 23, 8, 18, 7, 19, \ > + 4, 25, 3, 31, 5, 22, 11, 17, \ > + 9, 20, 2, 24, 1, 30, 12, 27, \ > + 13, 28, 6, 29, 14, 16, 15, 23 } > + > +#define PERM_CONST_RANDOM_v16hi \ > +{ 0, 21, 15, 9, 13, 25, 30, 18, \ > + 8, 16, 17, 11, 4, 22, 6, 7 } > + > +#define PERM_CONST_RANDOM_RANGE16_v16hi \ > +{ 0, 9, 1, 12, 4, 15, 7, 13, \ > + 3, 10, 6, 14, 5, 8, 2, 11 } > + > +#define PERM_CONST_RANDOM_v8hi \ > +{ 0, 14, 15, 9, 13, 2, 3, 5 } > + > +#define PERM_CONST_RANDOM_RANGE8_v8hi \ > +{ 0, 7, 2, 5, 3, 4, 1, 6 } > + > +#define PERM_CONST_RANDOM(size) \ > + PERM_CONST_RANDOM_v##size##hi > + > +#define PERM_CONST_RANDOM_RANGE(size) \ > + PERM_CONST_RANDOM_RANGE##size##_v##size##hi > + > +#define SHUFFLE_CONST_RANDOM(type, itype, size) \ > +type foo_##type##shuffle_2param_const_random (type a, type b) \ > +{ \ > + return __builtin_shuffle (a, b, \ > + (itype) PERM_CONST_RANDOM (size)); \ > +} \ > +type foo_##type##shuffle_2param_const_random_range (type a, type b) \ > +{ \ > + return __builtin_shuffle (a, b, \ > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > +} \ > +type foo_##type##shuffle_1param_const_random (type a) \ > +{ \ > + return __builtin_shuffle (a, \ > + (itype) PERM_CONST_RANDOM (size)); \ > +} \ > +type foo_##type##shuffle_1param_const_random_range (type a) \ > +{ \ > + return __builtin_shuffle (a, \ > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > +} > + > +#define SHUFFLE_VEC_INDEX(type, itype) \ > +type foo##type##itype##shuffle_2param_vec (type a, type b, itype c) \ > +{ \ > + return __builtin_shuffle (a, b, c); \ > +} \ > +type foo##type##itype##shuffle_1param_vec (type a, itype c) \ > +{ \ > + return __builtin_shuffle (a, c); \ > +} > + > +SHUFFLE_CONST_RANDOM (v32hf, v32hi, 32) > +SHUFFLE_CONST_RANDOM (v16hf, v16hi, 16) > +SHUFFLE_CONST_RANDOM (v8hf, v8hi, 8) > + > +SHUFFLE_VEC_INDEX (v32hf, v32hi) > +SHUFFLE_VEC_INDEX (v16hf, v16hi) > +SHUFFLE_VEC_INDEX (v8hf, v8hi) > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > new file mode 100644 > index 00000000000..abd91561785 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > @@ -0,0 +1,56 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > +/* { dg-final { scan-assembler-times "vpmovzxwd" "3" } } */ > +/* { dg-final { scan-assembler-times "vpmovdw" "3" } } */ > + > +typedef _Float16 v32hf __attribute__((vector_size (64))); > +typedef _Float16 v16hf __attribute__((vector_size (32))); > +typedef _Float16 v8hf __attribute__((vector_size (16))); > +typedef _Float16 v4hf __attribute__((vector_size (8))); > +typedef short v4hi __attribute__((vector_size (8))); > +typedef short v8hi __attribute__((vector_size (16))); > + > +#define PERM_CONST_INTERLEAVE_v32hi \ > +0, 16, 1, 17, 2, 18, 3, 19, \ > +4, 20, 5, 21, 6, 22, 7, 23, \ > +8, 24, 9, 25, 10, 26, 11, 27, \ > +12, 28, 13, 29, 14, 30, 15, 31 > + > +#define PERM_CONST_INTERLEAVE_v16hi \ > +0, 8, 1, 9, 2, 10, 3, 11, \ > +4, 12, 5, 13, 6, 14, 7, 15 > + > +#define PERM_CONST_INTERLEAVE_v8hi \ > +0, 4, 1, 5, 2, 6, 3, 7 > + > +#define PERM_CONST_TRUNCATE_v32hi \ > +0, 2, 4, 6, 8, 10, 12, 14, \ > +16, 18, 20, 22, 24, 26, 28, 30 > + > +#define PERM_CONST_TRUNCATE_v16hi \ > +0, 2, 4, 6, 8, 10, 12, 14 > + > +#define PERM_CONST_TRUNCATE_v8hi \ > +0, 2, 4, 6 > + > +#define PERM_CONST_INTERLEAVE(size) \ > + PERM_CONST_INTERLEAVE_v##size##hi > + > +#define PERM_CONST_TRUNCATE(size) \ > + PERM_CONST_TRUNCATE_v##size##hi > + > +#define SHUFFLE_CONST_INTERLEAVE(type, rtype, size) \ > +rtype foo_##type##shufflevector_const_interleave (type a) \ > +{ \ > + return __builtin_shufflevector (a, (type) {}, \ > + PERM_CONST_INTERLEAVE (size)); \ > +} \ > +type foo_##type##shufflevector_const_trunc (rtype a) \ > +{ \ > + return __builtin_shufflevector (a, a, \ > + PERM_CONST_TRUNCATE (size)); \ > +} > + > +SHUFFLE_CONST_INTERLEAVE (v16hf, v32hf, 32) > +SHUFFLE_CONST_INTERLEAVE (v8hf, v16hf, 16) > +SHUFFLE_CONST_INTERLEAVE (v4hf, v8hf, 8) > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > new file mode 100644 > index 00000000000..bfe11236eef > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > @@ -0,0 +1,61 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > +/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */ > +/* { dg-final { scan-assembler-times "vmovq" 2 } } */ > + > +typedef _Float16 v32hf __attribute__((vector_size (64))); > +typedef _Float16 v16hf __attribute__((vector_size (32))); > +typedef _Float16 v8hf __attribute__((vector_size (16))); > +typedef short v32hi __attribute__((vector_size (64))); > +typedef short v16hi __attribute__((vector_size (32))); > +typedef short v8hi __attribute__((vector_size (16))); > + > + > +#define PERM_CONST_CONCAT0_v32hi \ > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > + 8, 9, 10, 11, 12, 13, 14, 15, \ > + 34, 53, 41, 55, 57, 43, 36, 39, \ > + 62, 48, 50, 51, 49, 44, 60, 37 } > + > +#define PERM_CONST_CONCAT0_v32hi_l \ > +{ 32, 33, 34, 35, 36, 37, 38, 39, \ > + 40, 41, 42, 43, 44, 45, 46, 47, \ > + 31, 0, 29, 2, 27, 4, 25, 6, 23, \ > + 8, 21, 10, 19, 12, 17, 14 } > + > +#define PERM_CONST_CONCAT0_v16hi \ > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > + 21, 26, 17, 31, 24, 22, 30, 19 } > + > +#define PERM_CONST_CONCAT0_v16hi_l \ > +{ 16, 17, 18, 19, 20, 21, 22, 23, \ > + 15, 0, 13, 2, 11, 4, 9, 6 } > + > +#define PERM_CONST_CONCAT0_v8hi \ > +{ 0, 1, 2, 3, 9, 11, 14, 12 } > + > +#define PERM_CONST_CONCAT0_v8hi_l \ > +{ 8, 9, 10, 11, 3, 5, 1, 7 } > + > +#define PERM_CONST_CONCAT0(type) \ > + PERM_CONST_CONCAT0_##type > + > +#define PERM_CONST_CONCAT0_L(type) \ > + PERM_CONST_CONCAT0_##type##_l > + > +#define SHUFFLE_CONST_CONCAT0(type, itype) \ > +type foo_##type##shuffle_const_concat0 (type a) \ > +{ \ > + return __builtin_shuffle (a, (type) {0}, \ > + (itype) PERM_CONST_CONCAT0 (itype)); \ > +} \ > +type foo_##type##shuffle_const_concat0_l (type a) \ > +{ \ > + return __builtin_shuffle ((type) {0}, a, \ > + (itype) PERM_CONST_CONCAT0_L (itype)); \ > +} > + > +SHUFFLE_CONST_CONCAT0 (v32hf, v32hi) > +SHUFFLE_CONST_CONCAT0 (v16hf, v16hi) > +SHUFFLE_CONST_CONCAT0 (v8hf, v8hi) > + > -- > 2.18.1 >
> This part seems not related to vector shuffle. Yes, have separated this part to another patch and checked-in. Updated patch. Ok for this one? Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2021年10月14日周四 下午2:33写道: > > On Thu, Oct 14, 2021 at 10:39 AM Hongyu Wang via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Hi, > > > > This patch supports HFmode vector shuffle by creating HImode subreg when > > expanding permutation expr. > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} and sde{-m32,} > > OK for master? > > > > gcc/ChangeLog: > > > > * config/i386/i386-expand.c (ix86_expand_vec_perm): Convert > > HFmode input operand to HImode. > > (ix86_vectorize_vec_perm_const): Likewise. > > (ix86_expand_vector_init): Allow HFmode for one_operand_shuffle. > > * config/i386/sse.md (*avx512bw_permvar_truncv16siv16hi_1_hf): > > New define_insn. > > (*avx512f_permvar_truncv8siv8hi_1_hf): > > Likewise. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/avx512fp16-builtin_shuffle-1.c: New test. > > * gcc.target/i386/avx512fp16-pr101846.c: Ditto. > > * gcc.target/i386/avx512fp16-pr94680.c: Ditto. > > --- > > gcc/config/i386/i386-expand.c | 29 ++++++- > > gcc/config/i386/sse.md | 54 +++++++++++- > > .../i386/avx512fp16-builtin_shuffle-1.c | 86 +++++++++++++++++++ > > .../gcc.target/i386/avx512fp16-pr101846.c | 56 ++++++++++++ > > .../gcc.target/i386/avx512fp16-pr94680.c | 61 +++++++++++++ > > 5 files changed, 284 insertions(+), 2 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > > index c0924a59efb..0f50ed3b9f8 100644 > > --- a/gcc/config/i386/i386-expand.c > > +++ b/gcc/config/i386/i386-expand.c > > @@ -4836,6 +4836,18 @@ ix86_expand_vec_perm (rtx operands[]) > > e = GET_MODE_UNIT_SIZE (mode); > > gcc_assert (w <= 64); > > > > + if (GET_MODE_INNER (mode) == HFmode) > > + { > > + machine_mode orig_mode = mode; > > + mode = mode_for_vector (HImode, w).require (); > > + if (target) > > + target = lowpart_subreg (mode, target, orig_mode); > > + if (op0) > > + op0 = lowpart_subreg (mode, op0, orig_mode); > > + if (op1) > > + op1 = lowpart_subreg (mode, op1, orig_mode); > > + } > > + > > if (TARGET_AVX512F && one_operand_shuffle) > > { > > rtx (*gen) (rtx, rtx, rtx) = NULL; > > @@ -15092,7 +15104,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) > > rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; > > if (inner_mode == QImode > > || inner_mode == HImode > > - || inner_mode == TImode) > > + || inner_mode == TImode > > + || inner_mode == HFmode) > This part seems not related to vector shuffle. > > { > > unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); > > scalar_mode elt_mode = inner_mode == TImode ? DImode : SImode; > > @@ -21099,6 +21112,20 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, > > unsigned int i, nelt, which; > > bool two_args; > > > > + /* For HF mode vector, convert it to HI using subreg. */ > > + if (GET_MODE_INNER (vmode) == HFmode) > > + { > > + machine_mode orig_mode = vmode; > > + vmode = mode_for_vector (HImode, > > + GET_MODE_NUNITS (vmode)).require (); > > + if (target) > > + target = lowpart_subreg (vmode, target, orig_mode); > > + if (op0) > > + op0 = lowpart_subreg (vmode, op0, orig_mode); > > + if (op1) > > + op1 = lowpart_subreg (vmode, op1, orig_mode); > > + } > > + > > d.target = target; > > d.op0 = op0; > > d.op1 = op1; > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index a3c4a3f1e62..d023d8a1c2e 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -12573,6 +12573,33 @@ > > (truncate:V16HI (match_dup 1)))] > > "operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);") > > > > +(define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf" > > + [(set (match_operand:V16HF 0 "nonimmediate_operand") > > + (vec_select:V16HF > > + (subreg:V32HF > > + (unspec:V32HI > > + [(match_operand:V32HI 1 "register_operand") > > + (match_operand:V32HI 2 "permvar_truncate_operand")] > > + UNSPEC_VPERMVAR) 0) > > + (parallel [(const_int 0) (const_int 1) > > + (const_int 2) (const_int 3) > > + (const_int 4) (const_int 5) > > + (const_int 6) (const_int 7) > > + (const_int 8) (const_int 9) > > + (const_int 10) (const_int 11) > > + (const_int 12) (const_int 13) > > + (const_int 14) (const_int 15)])))] > > + "TARGET_AVX512BW && ix86_pre_reload_split ()" > > + "#" > > + "&& 1" > > + [(set (match_dup 0) > > + (truncate:V16HI (match_dup 1)))] > > +{ > > + operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode); > > + operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode); > > +}) > > + > > + > > (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1" > > [(set (match_operand:V8HI 0 "nonimmediate_operand") > > (vec_select:V8HI > > @@ -12591,6 +12618,28 @@ > > (truncate:V8HI (match_dup 1)))] > > "operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);") > > > > +(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf" > > + [(set (match_operand:V8HF 0 "nonimmediate_operand") > > + (vec_select:V8HF > > + (subreg:V16HF > > + (unspec:V16HI > > + [(match_operand:V16HI 1 "register_operand") > > + (match_operand:V16HI 2 "permvar_truncate_operand")] > > + UNSPEC_VPERMVAR) 0) > > + (parallel [(const_int 0) (const_int 1) > > + (const_int 2) (const_int 3) > > + (const_int 4) (const_int 5) > > + (const_int 6) (const_int 7)])))] > > + "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()" > > + "#" > > + "&& 1" > > + [(set (match_dup 0) > > + (truncate:V8HI (match_dup 1)))] > > +{ > > + operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode); > > + operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode); > > +}) > > + > > (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1" > > [(set (match_operand:V8SI 0 "nonimmediate_operand") > > (vec_select:V8SI > > @@ -15603,12 +15652,15 @@ > > > > (define_mode_iterator VEC_PERM_AVX2 > > [V16QI V8HI V4SI V2DI V4SF V2DF > > + (V8HF "TARGET_AVX512FP16") > > (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") > > (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") > > (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") > > + (V16HF "TARGET_AVX512FP16") > > (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") > > (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") > > - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) > > + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") > > + (V32HF "TARGET_AVX512FP16")]) > > > > (define_expand "vec_perm<mode>" > > [(match_operand:VEC_PERM_AVX2 0 "register_operand") > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > new file mode 100644 > > index 00000000000..89d3567a66b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > @@ -0,0 +1,86 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > +/* { dg-final { scan-assembler-not "movw" } } */ > > +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ > > +/* { dg-final { scan-assembler-times "vpermw" 6 } } */ > > +/* { dg-final { scan-assembler-times "vpshufb" 3 } } */ > > +/* { dg-final { scan-assembler-times "vpermt2w" 6 } } */ > > + > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > +typedef short v32hi __attribute__((vector_size (64))); > > +typedef short v16hi __attribute__((vector_size (32))); > > +typedef short v8hi __attribute__((vector_size (16))); > > + > > +#define PERM_CONST_RANDOM_v32hi \ > > +{ 0, 21, 15, 9, 43, 25, 37, 48, \ > > + 8, 16, 27, 51, 30, 12, 6, 46, \ > > + 34, 3, 11, 5, 17, 53, 26, 39, \ > > + 2, 18, 40, 61, 19, 4, 50, 29 } > > + > > +#define PERM_CONST_RANDOM_RANGE32_v32hi \ > > +{ 0, 21, 10, 23, 8, 18, 7, 19, \ > > + 4, 25, 3, 31, 5, 22, 11, 17, \ > > + 9, 20, 2, 24, 1, 30, 12, 27, \ > > + 13, 28, 6, 29, 14, 16, 15, 23 } > > + > > +#define PERM_CONST_RANDOM_v16hi \ > > +{ 0, 21, 15, 9, 13, 25, 30, 18, \ > > + 8, 16, 17, 11, 4, 22, 6, 7 } > > + > > +#define PERM_CONST_RANDOM_RANGE16_v16hi \ > > +{ 0, 9, 1, 12, 4, 15, 7, 13, \ > > + 3, 10, 6, 14, 5, 8, 2, 11 } > > + > > +#define PERM_CONST_RANDOM_v8hi \ > > +{ 0, 14, 15, 9, 13, 2, 3, 5 } > > + > > +#define PERM_CONST_RANDOM_RANGE8_v8hi \ > > +{ 0, 7, 2, 5, 3, 4, 1, 6 } > > + > > +#define PERM_CONST_RANDOM(size) \ > > + PERM_CONST_RANDOM_v##size##hi > > + > > +#define PERM_CONST_RANDOM_RANGE(size) \ > > + PERM_CONST_RANDOM_RANGE##size##_v##size##hi > > + > > +#define SHUFFLE_CONST_RANDOM(type, itype, size) \ > > +type foo_##type##shuffle_2param_const_random (type a, type b) \ > > +{ \ > > + return __builtin_shuffle (a, b, \ > > + (itype) PERM_CONST_RANDOM (size)); \ > > +} \ > > +type foo_##type##shuffle_2param_const_random_range (type a, type b) \ > > +{ \ > > + return __builtin_shuffle (a, b, \ > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > +} \ > > +type foo_##type##shuffle_1param_const_random (type a) \ > > +{ \ > > + return __builtin_shuffle (a, \ > > + (itype) PERM_CONST_RANDOM (size)); \ > > +} \ > > +type foo_##type##shuffle_1param_const_random_range (type a) \ > > +{ \ > > + return __builtin_shuffle (a, \ > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > +} > > + > > +#define SHUFFLE_VEC_INDEX(type, itype) \ > > +type foo##type##itype##shuffle_2param_vec (type a, type b, itype c) \ > > +{ \ > > + return __builtin_shuffle (a, b, c); \ > > +} \ > > +type foo##type##itype##shuffle_1param_vec (type a, itype c) \ > > +{ \ > > + return __builtin_shuffle (a, c); \ > > +} > > + > > +SHUFFLE_CONST_RANDOM (v32hf, v32hi, 32) > > +SHUFFLE_CONST_RANDOM (v16hf, v16hi, 16) > > +SHUFFLE_CONST_RANDOM (v8hf, v8hi, 8) > > + > > +SHUFFLE_VEC_INDEX (v32hf, v32hi) > > +SHUFFLE_VEC_INDEX (v16hf, v16hi) > > +SHUFFLE_VEC_INDEX (v8hf, v8hi) > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > new file mode 100644 > > index 00000000000..abd91561785 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > @@ -0,0 +1,56 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > +/* { dg-final { scan-assembler-times "vpmovzxwd" "3" } } */ > > +/* { dg-final { scan-assembler-times "vpmovdw" "3" } } */ > > + > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > +typedef _Float16 v4hf __attribute__((vector_size (8))); > > +typedef short v4hi __attribute__((vector_size (8))); > > +typedef short v8hi __attribute__((vector_size (16))); > > + > > +#define PERM_CONST_INTERLEAVE_v32hi \ > > +0, 16, 1, 17, 2, 18, 3, 19, \ > > +4, 20, 5, 21, 6, 22, 7, 23, \ > > +8, 24, 9, 25, 10, 26, 11, 27, \ > > +12, 28, 13, 29, 14, 30, 15, 31 > > + > > +#define PERM_CONST_INTERLEAVE_v16hi \ > > +0, 8, 1, 9, 2, 10, 3, 11, \ > > +4, 12, 5, 13, 6, 14, 7, 15 > > + > > +#define PERM_CONST_INTERLEAVE_v8hi \ > > +0, 4, 1, 5, 2, 6, 3, 7 > > + > > +#define PERM_CONST_TRUNCATE_v32hi \ > > +0, 2, 4, 6, 8, 10, 12, 14, \ > > +16, 18, 20, 22, 24, 26, 28, 30 > > + > > +#define PERM_CONST_TRUNCATE_v16hi \ > > +0, 2, 4, 6, 8, 10, 12, 14 > > + > > +#define PERM_CONST_TRUNCATE_v8hi \ > > +0, 2, 4, 6 > > + > > +#define PERM_CONST_INTERLEAVE(size) \ > > + PERM_CONST_INTERLEAVE_v##size##hi > > + > > +#define PERM_CONST_TRUNCATE(size) \ > > + PERM_CONST_TRUNCATE_v##size##hi > > + > > +#define SHUFFLE_CONST_INTERLEAVE(type, rtype, size) \ > > +rtype foo_##type##shufflevector_const_interleave (type a) \ > > +{ \ > > + return __builtin_shufflevector (a, (type) {}, \ > > + PERM_CONST_INTERLEAVE (size)); \ > > +} \ > > +type foo_##type##shufflevector_const_trunc (rtype a) \ > > +{ \ > > + return __builtin_shufflevector (a, a, \ > > + PERM_CONST_TRUNCATE (size)); \ > > +} > > + > > +SHUFFLE_CONST_INTERLEAVE (v16hf, v32hf, 32) > > +SHUFFLE_CONST_INTERLEAVE (v8hf, v16hf, 16) > > +SHUFFLE_CONST_INTERLEAVE (v4hf, v8hf, 8) > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > new file mode 100644 > > index 00000000000..bfe11236eef > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > @@ -0,0 +1,61 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > +/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */ > > +/* { dg-final { scan-assembler-times "vmovq" 2 } } */ > > + > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > +typedef short v32hi __attribute__((vector_size (64))); > > +typedef short v16hi __attribute__((vector_size (32))); > > +typedef short v8hi __attribute__((vector_size (16))); > > + > > + > > +#define PERM_CONST_CONCAT0_v32hi \ > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > + 8, 9, 10, 11, 12, 13, 14, 15, \ > > + 34, 53, 41, 55, 57, 43, 36, 39, \ > > + 62, 48, 50, 51, 49, 44, 60, 37 } > > + > > +#define PERM_CONST_CONCAT0_v32hi_l \ > > +{ 32, 33, 34, 35, 36, 37, 38, 39, \ > > + 40, 41, 42, 43, 44, 45, 46, 47, \ > > + 31, 0, 29, 2, 27, 4, 25, 6, 23, \ > > + 8, 21, 10, 19, 12, 17, 14 } > > + > > +#define PERM_CONST_CONCAT0_v16hi \ > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > + 21, 26, 17, 31, 24, 22, 30, 19 } > > + > > +#define PERM_CONST_CONCAT0_v16hi_l \ > > +{ 16, 17, 18, 19, 20, 21, 22, 23, \ > > + 15, 0, 13, 2, 11, 4, 9, 6 } > > + > > +#define PERM_CONST_CONCAT0_v8hi \ > > +{ 0, 1, 2, 3, 9, 11, 14, 12 } > > + > > +#define PERM_CONST_CONCAT0_v8hi_l \ > > +{ 8, 9, 10, 11, 3, 5, 1, 7 } > > + > > +#define PERM_CONST_CONCAT0(type) \ > > + PERM_CONST_CONCAT0_##type > > + > > +#define PERM_CONST_CONCAT0_L(type) \ > > + PERM_CONST_CONCAT0_##type##_l > > + > > +#define SHUFFLE_CONST_CONCAT0(type, itype) \ > > +type foo_##type##shuffle_const_concat0 (type a) \ > > +{ \ > > + return __builtin_shuffle (a, (type) {0}, \ > > + (itype) PERM_CONST_CONCAT0 (itype)); \ > > +} \ > > +type foo_##type##shuffle_const_concat0_l (type a) \ > > +{ \ > > + return __builtin_shuffle ((type) {0}, a, \ > > + (itype) PERM_CONST_CONCAT0_L (itype)); \ > > +} > > + > > +SHUFFLE_CONST_CONCAT0 (v32hf, v32hi) > > +SHUFFLE_CONST_CONCAT0 (v16hf, v16hi) > > +SHUFFLE_CONST_CONCAT0 (v8hf, v8hi) > > + > > -- > > 2.18.1 > > > > > -- > BR, > Hongtao
On Fri, Oct 15, 2021 at 1:37 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote: > > > This part seems not related to vector shuffle. > Yes, have separated this part to another patch and checked-in. > > Updated patch. Ok for this one? > > Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2021年10月14日周四 下午2:33写道: > > > > On Thu, Oct 14, 2021 at 10:39 AM Hongyu Wang via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > Hi, > > > > > > This patch supports HFmode vector shuffle by creating HImode subreg when > > > expanding permutation expr. > > > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} and sde{-m32,} > > > OK for master? > > > > > > gcc/ChangeLog: > > > > > > * config/i386/i386-expand.c (ix86_expand_vec_perm): Convert > > > HFmode input operand to HImode. > > > (ix86_vectorize_vec_perm_const): Likewise. > > > (ix86_expand_vector_init): Allow HFmode for one_operand_shuffle. > > > * config/i386/sse.md (*avx512bw_permvar_truncv16siv16hi_1_hf): > > > New define_insn. > > > (*avx512f_permvar_truncv8siv8hi_1_hf): > > > Likewise. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.target/i386/avx512fp16-builtin_shuffle-1.c: New test. > > > * gcc.target/i386/avx512fp16-pr101846.c: Ditto. > > > * gcc.target/i386/avx512fp16-pr94680.c: Ditto. > > > --- > > > gcc/config/i386/i386-expand.c | 29 ++++++- > > > gcc/config/i386/sse.md | 54 +++++++++++- > > > .../i386/avx512fp16-builtin_shuffle-1.c | 86 +++++++++++++++++++ > > > .../gcc.target/i386/avx512fp16-pr101846.c | 56 ++++++++++++ > > > .../gcc.target/i386/avx512fp16-pr94680.c | 61 +++++++++++++ > > > 5 files changed, 284 insertions(+), 2 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > > > index c0924a59efb..0f50ed3b9f8 100644 > > > --- a/gcc/config/i386/i386-expand.c > > > +++ b/gcc/config/i386/i386-expand.c > > > @@ -4836,6 +4836,18 @@ ix86_expand_vec_perm (rtx operands[]) > > > e = GET_MODE_UNIT_SIZE (mode); > > > gcc_assert (w <= 64); > > > > > > + if (GET_MODE_INNER (mode) == HFmode) > > > + { > > > + machine_mode orig_mode = mode; > > > + mode = mode_for_vector (HImode, w).require (); > > > + if (target) > > > + target = lowpart_subreg (mode, target, orig_mode); > > > + if (op0) > > > + op0 = lowpart_subreg (mode, op0, orig_mode); > > > + if (op1) > > > + op1 = lowpart_subreg (mode, op1, orig_mode); > > > + } > > > + ix86_expand_vec_perm is only called by (define_expand "vec_perm<mode>" which means target, op0 and op1 must existed, and you can drop if(target/op0/op1) stuff. > > > if (TARGET_AVX512F && one_operand_shuffle) > > > { > > > rtx (*gen) (rtx, rtx, rtx) = NULL; > > > @@ -15092,7 +15104,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) > > > rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; > > > if (inner_mode == QImode > > > || inner_mode == HImode > > > - || inner_mode == TImode) > > > + || inner_mode == TImode > > > + || inner_mode == HFmode) > > This part seems not related to vector shuffle. > > > { > > > unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); > > > scalar_mode elt_mode = inner_mode == TImode ? DImode : SImode; > > > @@ -21099,6 +21112,20 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, > > > unsigned int i, nelt, which; > > > bool two_args; > > > > > > + /* For HF mode vector, convert it to HI using subreg. */ > > > + if (GET_MODE_INNER (vmode) == HFmode) > > > + { > > > + machine_mode orig_mode = vmode; > > > + vmode = mode_for_vector (HImode, > > > + GET_MODE_NUNITS (vmode)).require (); > > > + if (target) > > > + target = lowpart_subreg (vmode, target, orig_mode); > > > + if (op0) > > > + op0 = lowpart_subreg (vmode, op0, orig_mode); > > > + if (op1) > > > + op1 = lowpart_subreg (vmode, op1, orig_mode); > > > + } > > > + Those checks for NULL seems reasonable according to documents, op0,op1,target maybe NULL. @deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel}) This hook is used to test whether the target can permute up to two vectors of mode @var{mode} using the permutation vector @code{sel}, and also to emit such a permutation. In the former case @var{in0}, @var{in1} and @var{out} are all null. In the latter case @var{in0} and @var{in1} are the source vectors and @var{out} is the destination vector; all three are operands of mode @var{mode}. @var{in1} is the same as @var{in0} if @var{sel} describes a permutation on one vector instead of two. > > > d.target = target; > > > d.op0 = op0; > > > d.op1 = op1; > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > > index a3c4a3f1e62..d023d8a1c2e 100644 > > > --- a/gcc/config/i386/sse.md > > > +++ b/gcc/config/i386/sse.md > > > @@ -12573,6 +12573,33 @@ > > > (truncate:V16HI (match_dup 1)))] > > > "operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);") > > > > > > +(define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf" > > > + [(set (match_operand:V16HF 0 "nonimmediate_operand") > > > + (vec_select:V16HF > > > + (subreg:V32HF > > > + (unspec:V32HI > > > + [(match_operand:V32HI 1 "register_operand") > > > + (match_operand:V32HI 2 "permvar_truncate_operand")] > > > + UNSPEC_VPERMVAR) 0) > > > + (parallel [(const_int 0) (const_int 1) > > > + (const_int 2) (const_int 3) > > > + (const_int 4) (const_int 5) > > > + (const_int 6) (const_int 7) > > > + (const_int 8) (const_int 9) > > > + (const_int 10) (const_int 11) > > > + (const_int 12) (const_int 13) > > > + (const_int 14) (const_int 15)])))] > > > + "TARGET_AVX512BW && ix86_pre_reload_split ()" > > > + "#" > > > + "&& 1" > > > + [(set (match_dup 0) > > > + (truncate:V16HI (match_dup 1)))] > > > +{ > > > + operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode); > > > + operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode); > > > +}) > > > + > > > + > > > (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1" > > > [(set (match_operand:V8HI 0 "nonimmediate_operand") > > > (vec_select:V8HI > > > @@ -12591,6 +12618,28 @@ > > > (truncate:V8HI (match_dup 1)))] > > > "operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);") > > > > > > +(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf" > > > + [(set (match_operand:V8HF 0 "nonimmediate_operand") > > > + (vec_select:V8HF > > > + (subreg:V16HF > > > + (unspec:V16HI > > > + [(match_operand:V16HI 1 "register_operand") > > > + (match_operand:V16HI 2 "permvar_truncate_operand")] > > > + UNSPEC_VPERMVAR) 0) > > > + (parallel [(const_int 0) (const_int 1) > > > + (const_int 2) (const_int 3) > > > + (const_int 4) (const_int 5) > > > + (const_int 6) (const_int 7)])))] > > > + "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()" > > > + "#" > > > + "&& 1" > > > + [(set (match_dup 0) > > > + (truncate:V8HI (match_dup 1)))] > > > +{ > > > + operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode); > > > + operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode); > > > +}) > > > + > > > (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1" > > > [(set (match_operand:V8SI 0 "nonimmediate_operand") > > > (vec_select:V8SI > > > @@ -15603,12 +15652,15 @@ > > > > > > (define_mode_iterator VEC_PERM_AVX2 > > > [V16QI V8HI V4SI V2DI V4SF V2DF > > > + (V8HF "TARGET_AVX512FP16") > > > (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") > > > (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") > > > (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") > > > + (V16HF "TARGET_AVX512FP16") > > > (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") > > > (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") > > > - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) > > > + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") > > > + (V32HF "TARGET_AVX512FP16")]) > > > > > > (define_expand "vec_perm<mode>" > > > [(match_operand:VEC_PERM_AVX2 0 "register_operand") > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > new file mode 100644 > > > index 00000000000..89d3567a66b > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > @@ -0,0 +1,86 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > +/* { dg-final { scan-assembler-not "movw" } } */ > > > +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ > > > +/* { dg-final { scan-assembler-times "vpermw" 6 } } */ > > > +/* { dg-final { scan-assembler-times "vpshufb" 3 } } */ > > > +/* { dg-final { scan-assembler-times "vpermt2w" 6 } } */ > > > + > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > +typedef short v32hi __attribute__((vector_size (64))); > > > +typedef short v16hi __attribute__((vector_size (32))); > > > +typedef short v8hi __attribute__((vector_size (16))); > > > + > > > +#define PERM_CONST_RANDOM_v32hi \ > > > +{ 0, 21, 15, 9, 43, 25, 37, 48, \ > > > + 8, 16, 27, 51, 30, 12, 6, 46, \ > > > + 34, 3, 11, 5, 17, 53, 26, 39, \ > > > + 2, 18, 40, 61, 19, 4, 50, 29 } > > > + > > > +#define PERM_CONST_RANDOM_RANGE32_v32hi \ > > > +{ 0, 21, 10, 23, 8, 18, 7, 19, \ > > > + 4, 25, 3, 31, 5, 22, 11, 17, \ > > > + 9, 20, 2, 24, 1, 30, 12, 27, \ > > > + 13, 28, 6, 29, 14, 16, 15, 23 } > > > + > > > +#define PERM_CONST_RANDOM_v16hi \ > > > +{ 0, 21, 15, 9, 13, 25, 30, 18, \ > > > + 8, 16, 17, 11, 4, 22, 6, 7 } > > > + > > > +#define PERM_CONST_RANDOM_RANGE16_v16hi \ > > > +{ 0, 9, 1, 12, 4, 15, 7, 13, \ > > > + 3, 10, 6, 14, 5, 8, 2, 11 } > > > + > > > +#define PERM_CONST_RANDOM_v8hi \ > > > +{ 0, 14, 15, 9, 13, 2, 3, 5 } > > > + > > > +#define PERM_CONST_RANDOM_RANGE8_v8hi \ > > > +{ 0, 7, 2, 5, 3, 4, 1, 6 } > > > + > > > +#define PERM_CONST_RANDOM(size) \ > > > + PERM_CONST_RANDOM_v##size##hi > > > + > > > +#define PERM_CONST_RANDOM_RANGE(size) \ > > > + PERM_CONST_RANDOM_RANGE##size##_v##size##hi > > > + > > > +#define SHUFFLE_CONST_RANDOM(type, itype, size) \ > > > +type foo_##type##shuffle_2param_const_random (type a, type b) \ > > > +{ \ > > > + return __builtin_shuffle (a, b, \ > > > + (itype) PERM_CONST_RANDOM (size)); \ > > > +} \ > > > +type foo_##type##shuffle_2param_const_random_range (type a, type b) \ > > > +{ \ > > > + return __builtin_shuffle (a, b, \ > > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > > +} \ > > > +type foo_##type##shuffle_1param_const_random (type a) \ > > > +{ \ > > > + return __builtin_shuffle (a, \ > > > + (itype) PERM_CONST_RANDOM (size)); \ > > > +} \ > > > +type foo_##type##shuffle_1param_const_random_range (type a) \ > > > +{ \ > > > + return __builtin_shuffle (a, \ > > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > > +} > > > + > > > +#define SHUFFLE_VEC_INDEX(type, itype) \ > > > +type foo##type##itype##shuffle_2param_vec (type a, type b, itype c) \ > > > +{ \ > > > + return __builtin_shuffle (a, b, c); \ > > > +} \ > > > +type foo##type##itype##shuffle_1param_vec (type a, itype c) \ > > > +{ \ > > > + return __builtin_shuffle (a, c); \ > > > +} > > > + > > > +SHUFFLE_CONST_RANDOM (v32hf, v32hi, 32) > > > +SHUFFLE_CONST_RANDOM (v16hf, v16hi, 16) > > > +SHUFFLE_CONST_RANDOM (v8hf, v8hi, 8) > > > + > > > +SHUFFLE_VEC_INDEX (v32hf, v32hi) > > > +SHUFFLE_VEC_INDEX (v16hf, v16hi) > > > +SHUFFLE_VEC_INDEX (v8hf, v8hi) > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > new file mode 100644 > > > index 00000000000..abd91561785 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > @@ -0,0 +1,56 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > +/* { dg-final { scan-assembler-times "vpmovzxwd" "3" } } */ > > > +/* { dg-final { scan-assembler-times "vpmovdw" "3" } } */ > > > + > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > +typedef _Float16 v4hf __attribute__((vector_size (8))); > > > +typedef short v4hi __attribute__((vector_size (8))); > > > +typedef short v8hi __attribute__((vector_size (16))); > > > + > > > +#define PERM_CONST_INTERLEAVE_v32hi \ > > > +0, 16, 1, 17, 2, 18, 3, 19, \ > > > +4, 20, 5, 21, 6, 22, 7, 23, \ > > > +8, 24, 9, 25, 10, 26, 11, 27, \ > > > +12, 28, 13, 29, 14, 30, 15, 31 > > > + > > > +#define PERM_CONST_INTERLEAVE_v16hi \ > > > +0, 8, 1, 9, 2, 10, 3, 11, \ > > > +4, 12, 5, 13, 6, 14, 7, 15 > > > + > > > +#define PERM_CONST_INTERLEAVE_v8hi \ > > > +0, 4, 1, 5, 2, 6, 3, 7 > > > + > > > +#define PERM_CONST_TRUNCATE_v32hi \ > > > +0, 2, 4, 6, 8, 10, 12, 14, \ > > > +16, 18, 20, 22, 24, 26, 28, 30 > > > + > > > +#define PERM_CONST_TRUNCATE_v16hi \ > > > +0, 2, 4, 6, 8, 10, 12, 14 > > > + > > > +#define PERM_CONST_TRUNCATE_v8hi \ > > > +0, 2, 4, 6 > > > + > > > +#define PERM_CONST_INTERLEAVE(size) \ > > > + PERM_CONST_INTERLEAVE_v##size##hi > > > + > > > +#define PERM_CONST_TRUNCATE(size) \ > > > + PERM_CONST_TRUNCATE_v##size##hi > > > + > > > +#define SHUFFLE_CONST_INTERLEAVE(type, rtype, size) \ > > > +rtype foo_##type##shufflevector_const_interleave (type a) \ > > > +{ \ > > > + return __builtin_shufflevector (a, (type) {}, \ > > > + PERM_CONST_INTERLEAVE (size)); \ > > > +} \ > > > +type foo_##type##shufflevector_const_trunc (rtype a) \ > > > +{ \ > > > + return __builtin_shufflevector (a, a, \ > > > + PERM_CONST_TRUNCATE (size)); \ > > > +} > > > + > > > +SHUFFLE_CONST_INTERLEAVE (v16hf, v32hf, 32) > > > +SHUFFLE_CONST_INTERLEAVE (v8hf, v16hf, 16) > > > +SHUFFLE_CONST_INTERLEAVE (v4hf, v8hf, 8) > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > new file mode 100644 > > > index 00000000000..bfe11236eef > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > @@ -0,0 +1,61 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > +/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */ > > > +/* { dg-final { scan-assembler-times "vmovq" 2 } } */ > > > + > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > +typedef short v32hi __attribute__((vector_size (64))); > > > +typedef short v16hi __attribute__((vector_size (32))); > > > +typedef short v8hi __attribute__((vector_size (16))); > > > + > > > + > > > +#define PERM_CONST_CONCAT0_v32hi \ > > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > > + 8, 9, 10, 11, 12, 13, 14, 15, \ > > > + 34, 53, 41, 55, 57, 43, 36, 39, \ > > > + 62, 48, 50, 51, 49, 44, 60, 37 } > > > + > > > +#define PERM_CONST_CONCAT0_v32hi_l \ > > > +{ 32, 33, 34, 35, 36, 37, 38, 39, \ > > > + 40, 41, 42, 43, 44, 45, 46, 47, \ > > > + 31, 0, 29, 2, 27, 4, 25, 6, 23, \ > > > + 8, 21, 10, 19, 12, 17, 14 } > > > + > > > +#define PERM_CONST_CONCAT0_v16hi \ > > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > > + 21, 26, 17, 31, 24, 22, 30, 19 } > > > + > > > +#define PERM_CONST_CONCAT0_v16hi_l \ > > > +{ 16, 17, 18, 19, 20, 21, 22, 23, \ > > > + 15, 0, 13, 2, 11, 4, 9, 6 } > > > + > > > +#define PERM_CONST_CONCAT0_v8hi \ > > > +{ 0, 1, 2, 3, 9, 11, 14, 12 } > > > + > > > +#define PERM_CONST_CONCAT0_v8hi_l \ > > > +{ 8, 9, 10, 11, 3, 5, 1, 7 } > > > + > > > +#define PERM_CONST_CONCAT0(type) \ > > > + PERM_CONST_CONCAT0_##type > > > + > > > +#define PERM_CONST_CONCAT0_L(type) \ > > > + PERM_CONST_CONCAT0_##type##_l > > > + > > > +#define SHUFFLE_CONST_CONCAT0(type, itype) \ > > > +type foo_##type##shuffle_const_concat0 (type a) \ > > > +{ \ > > > + return __builtin_shuffle (a, (type) {0}, \ > > > + (itype) PERM_CONST_CONCAT0 (itype)); \ > > > +} \ > > > +type foo_##type##shuffle_const_concat0_l (type a) \ > > > +{ \ > > > + return __builtin_shuffle ((type) {0}, a, \ > > > + (itype) PERM_CONST_CONCAT0_L (itype)); \ > > > +} > > > + > > > +SHUFFLE_CONST_CONCAT0 (v32hf, v32hi) > > > +SHUFFLE_CONST_CONCAT0 (v16hf, v16hi) > > > +SHUFFLE_CONST_CONCAT0 (v8hf, v8hi) > > > + > > > -- > > > 2.18.1 > > > > > > > > > -- > > BR, > > Hongtao
> ix86_expand_vec_perm is only called by (define_expand "vec_perm<mode>" > which means target, op0 and op1 must existed, and you can drop > if(target/op0/op1) stuff. Yes, dropped. > Those checks for NULL seems reasonable according to documents, > op0,op1,target maybe NULL. Thanks for pointing it out, didn't realize the difference between these 2 functions. Updated patch. Hongtao Liu <crazylht@gmail.com> 于2021年10月15日周五 下午1:54写道: > > On Fri, Oct 15, 2021 at 1:37 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote: > > > > > This part seems not related to vector shuffle. > > Yes, have separated this part to another patch and checked-in. > > > > Updated patch. Ok for this one? > > > > Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2021年10月14日周四 下午2:33写道: > > > > > > On Thu, Oct 14, 2021 at 10:39 AM Hongyu Wang via Gcc-patches > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > Hi, > > > > > > > > This patch supports HFmode vector shuffle by creating HImode subreg when > > > > expanding permutation expr. > > > > > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} and sde{-m32,} > > > > OK for master? > > > > > > > > gcc/ChangeLog: > > > > > > > > * config/i386/i386-expand.c (ix86_expand_vec_perm): Convert > > > > HFmode input operand to HImode. > > > > (ix86_vectorize_vec_perm_const): Likewise. > > > > (ix86_expand_vector_init): Allow HFmode for one_operand_shuffle. > > > > * config/i386/sse.md (*avx512bw_permvar_truncv16siv16hi_1_hf): > > > > New define_insn. > > > > (*avx512f_permvar_truncv8siv8hi_1_hf): > > > > Likewise. > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > * gcc.target/i386/avx512fp16-builtin_shuffle-1.c: New test. > > > > * gcc.target/i386/avx512fp16-pr101846.c: Ditto. > > > > * gcc.target/i386/avx512fp16-pr94680.c: Ditto. > > > > --- > > > > gcc/config/i386/i386-expand.c | 29 ++++++- > > > > gcc/config/i386/sse.md | 54 +++++++++++- > > > > .../i386/avx512fp16-builtin_shuffle-1.c | 86 +++++++++++++++++++ > > > > .../gcc.target/i386/avx512fp16-pr101846.c | 56 ++++++++++++ > > > > .../gcc.target/i386/avx512fp16-pr94680.c | 61 +++++++++++++ > > > > 5 files changed, 284 insertions(+), 2 deletions(-) > > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > > > > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > > > > index c0924a59efb..0f50ed3b9f8 100644 > > > > --- a/gcc/config/i386/i386-expand.c > > > > +++ b/gcc/config/i386/i386-expand.c > > > > @@ -4836,6 +4836,18 @@ ix86_expand_vec_perm (rtx operands[]) > > > > e = GET_MODE_UNIT_SIZE (mode); > > > > gcc_assert (w <= 64); > > > > > > > > + if (GET_MODE_INNER (mode) == HFmode) > > > > + { > > > > + machine_mode orig_mode = mode; > > > > + mode = mode_for_vector (HImode, w).require (); > > > > + if (target) > > > > + target = lowpart_subreg (mode, target, orig_mode); > > > > + if (op0) > > > > + op0 = lowpart_subreg (mode, op0, orig_mode); > > > > + if (op1) > > > > + op1 = lowpart_subreg (mode, op1, orig_mode); > > > > + } > > > > + > ix86_expand_vec_perm is only called by (define_expand "vec_perm<mode>" > which means target, op0 and op1 must existed, and you can drop > if(target/op0/op1) stuff. > > > > if (TARGET_AVX512F && one_operand_shuffle) > > > > { > > > > rtx (*gen) (rtx, rtx, rtx) = NULL; > > > > @@ -15092,7 +15104,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) > > > > rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; > > > > if (inner_mode == QImode > > > > || inner_mode == HImode > > > > - || inner_mode == TImode) > > > > + || inner_mode == TImode > > > > + || inner_mode == HFmode) > > > This part seems not related to vector shuffle. > > > > { > > > > unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); > > > > scalar_mode elt_mode = inner_mode == TImode ? DImode : SImode; > > > > @@ -21099,6 +21112,20 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, > > > > unsigned int i, nelt, which; > > > > bool two_args; > > > > > > > > + /* For HF mode vector, convert it to HI using subreg. */ > > > > + if (GET_MODE_INNER (vmode) == HFmode) > > > > + { > > > > + machine_mode orig_mode = vmode; > > > > + vmode = mode_for_vector (HImode, > > > > + GET_MODE_NUNITS (vmode)).require (); > > > > + if (target) > > > > + target = lowpart_subreg (vmode, target, orig_mode); > > > > + if (op0) > > > > + op0 = lowpart_subreg (vmode, op0, orig_mode); > > > > + if (op1) > > > > + op1 = lowpart_subreg (vmode, op1, orig_mode); > > > > + } > > > > + > Those checks for NULL seems reasonable according to documents, > op0,op1,target maybe NULL. > @deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST > (machine_mode @var{mode}, rtx @var{output}, rtx @var{in0}, rtx > @var{in1}, const vec_perm_indices @var{&sel}) > This hook is used to test whether the target can permute up to two > vectors of mode @var{mode} using the permutation vector @code{sel}, and > also to emit such a permutation. In the former case @var{in0}, @var{in1} > and @var{out} are all null. In the latter case @var{in0} and @var{in1} are > the source vectors and @var{out} is the destination vector; all three are > operands of mode @var{mode}. @var{in1} is the same as @var{in0} if > @var{sel} describes a permutation on one vector instead of two. > > > > d.target = target; > > > > d.op0 = op0; > > > > d.op1 = op1; > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > > > index a3c4a3f1e62..d023d8a1c2e 100644 > > > > --- a/gcc/config/i386/sse.md > > > > +++ b/gcc/config/i386/sse.md > > > > @@ -12573,6 +12573,33 @@ > > > > (truncate:V16HI (match_dup 1)))] > > > > "operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);") > > > > > > > > +(define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf" > > > > + [(set (match_operand:V16HF 0 "nonimmediate_operand") > > > > + (vec_select:V16HF > > > > + (subreg:V32HF > > > > + (unspec:V32HI > > > > + [(match_operand:V32HI 1 "register_operand") > > > > + (match_operand:V32HI 2 "permvar_truncate_operand")] > > > > + UNSPEC_VPERMVAR) 0) > > > > + (parallel [(const_int 0) (const_int 1) > > > > + (const_int 2) (const_int 3) > > > > + (const_int 4) (const_int 5) > > > > + (const_int 6) (const_int 7) > > > > + (const_int 8) (const_int 9) > > > > + (const_int 10) (const_int 11) > > > > + (const_int 12) (const_int 13) > > > > + (const_int 14) (const_int 15)])))] > > > > + "TARGET_AVX512BW && ix86_pre_reload_split ()" > > > > + "#" > > > > + "&& 1" > > > > + [(set (match_dup 0) > > > > + (truncate:V16HI (match_dup 1)))] > > > > +{ > > > > + operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode); > > > > + operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode); > > > > +}) > > > > + > > > > + > > > > (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1" > > > > [(set (match_operand:V8HI 0 "nonimmediate_operand") > > > > (vec_select:V8HI > > > > @@ -12591,6 +12618,28 @@ > > > > (truncate:V8HI (match_dup 1)))] > > > > "operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);") > > > > > > > > +(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf" > > > > + [(set (match_operand:V8HF 0 "nonimmediate_operand") > > > > + (vec_select:V8HF > > > > + (subreg:V16HF > > > > + (unspec:V16HI > > > > + [(match_operand:V16HI 1 "register_operand") > > > > + (match_operand:V16HI 2 "permvar_truncate_operand")] > > > > + UNSPEC_VPERMVAR) 0) > > > > + (parallel [(const_int 0) (const_int 1) > > > > + (const_int 2) (const_int 3) > > > > + (const_int 4) (const_int 5) > > > > + (const_int 6) (const_int 7)])))] > > > > + "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()" > > > > + "#" > > > > + "&& 1" > > > > + [(set (match_dup 0) > > > > + (truncate:V8HI (match_dup 1)))] > > > > +{ > > > > + operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode); > > > > + operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode); > > > > +}) > > > > + > > > > (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1" > > > > [(set (match_operand:V8SI 0 "nonimmediate_operand") > > > > (vec_select:V8SI > > > > @@ -15603,12 +15652,15 @@ > > > > > > > > (define_mode_iterator VEC_PERM_AVX2 > > > > [V16QI V8HI V4SI V2DI V4SF V2DF > > > > + (V8HF "TARGET_AVX512FP16") > > > > (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") > > > > (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") > > > > (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") > > > > + (V16HF "TARGET_AVX512FP16") > > > > (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") > > > > (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") > > > > - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) > > > > + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") > > > > + (V32HF "TARGET_AVX512FP16")]) > > > > > > > > (define_expand "vec_perm<mode>" > > > > [(match_operand:VEC_PERM_AVX2 0 "register_operand") > > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > > new file mode 100644 > > > > index 00000000000..89d3567a66b > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > > @@ -0,0 +1,86 @@ > > > > +/* { dg-do compile } */ > > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > > +/* { dg-final { scan-assembler-not "movw" } } */ > > > > +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ > > > > +/* { dg-final { scan-assembler-times "vpermw" 6 } } */ > > > > +/* { dg-final { scan-assembler-times "vpshufb" 3 } } */ > > > > +/* { dg-final { scan-assembler-times "vpermt2w" 6 } } */ > > > > + > > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > > +typedef short v32hi __attribute__((vector_size (64))); > > > > +typedef short v16hi __attribute__((vector_size (32))); > > > > +typedef short v8hi __attribute__((vector_size (16))); > > > > + > > > > +#define PERM_CONST_RANDOM_v32hi \ > > > > +{ 0, 21, 15, 9, 43, 25, 37, 48, \ > > > > + 8, 16, 27, 51, 30, 12, 6, 46, \ > > > > + 34, 3, 11, 5, 17, 53, 26, 39, \ > > > > + 2, 18, 40, 61, 19, 4, 50, 29 } > > > > + > > > > +#define PERM_CONST_RANDOM_RANGE32_v32hi \ > > > > +{ 0, 21, 10, 23, 8, 18, 7, 19, \ > > > > + 4, 25, 3, 31, 5, 22, 11, 17, \ > > > > + 9, 20, 2, 24, 1, 30, 12, 27, \ > > > > + 13, 28, 6, 29, 14, 16, 15, 23 } > > > > + > > > > +#define PERM_CONST_RANDOM_v16hi \ > > > > +{ 0, 21, 15, 9, 13, 25, 30, 18, \ > > > > + 8, 16, 17, 11, 4, 22, 6, 7 } > > > > + > > > > +#define PERM_CONST_RANDOM_RANGE16_v16hi \ > > > > +{ 0, 9, 1, 12, 4, 15, 7, 13, \ > > > > + 3, 10, 6, 14, 5, 8, 2, 11 } > > > > + > > > > +#define PERM_CONST_RANDOM_v8hi \ > > > > +{ 0, 14, 15, 9, 13, 2, 3, 5 } > > > > + > > > > +#define PERM_CONST_RANDOM_RANGE8_v8hi \ > > > > +{ 0, 7, 2, 5, 3, 4, 1, 6 } > > > > + > > > > +#define PERM_CONST_RANDOM(size) \ > > > > + PERM_CONST_RANDOM_v##size##hi > > > > + > > > > +#define PERM_CONST_RANDOM_RANGE(size) \ > > > > + PERM_CONST_RANDOM_RANGE##size##_v##size##hi > > > > + > > > > +#define SHUFFLE_CONST_RANDOM(type, itype, size) \ > > > > +type foo_##type##shuffle_2param_const_random (type a, type b) \ > > > > +{ \ > > > > + return __builtin_shuffle (a, b, \ > > > > + (itype) PERM_CONST_RANDOM (size)); \ > > > > +} \ > > > > +type foo_##type##shuffle_2param_const_random_range (type a, type b) \ > > > > +{ \ > > > > + return __builtin_shuffle (a, b, \ > > > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > > > +} \ > > > > +type foo_##type##shuffle_1param_const_random (type a) \ > > > > +{ \ > > > > + return __builtin_shuffle (a, \ > > > > + (itype) PERM_CONST_RANDOM (size)); \ > > > > +} \ > > > > +type foo_##type##shuffle_1param_const_random_range (type a) \ > > > > +{ \ > > > > + return __builtin_shuffle (a, \ > > > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > > > +} > > > > + > > > > +#define SHUFFLE_VEC_INDEX(type, itype) \ > > > > +type foo##type##itype##shuffle_2param_vec (type a, type b, itype c) \ > > > > +{ \ > > > > + return __builtin_shuffle (a, b, c); \ > > > > +} \ > > > > +type foo##type##itype##shuffle_1param_vec (type a, itype c) \ > > > > +{ \ > > > > + return __builtin_shuffle (a, c); \ > > > > +} > > > > + > > > > +SHUFFLE_CONST_RANDOM (v32hf, v32hi, 32) > > > > +SHUFFLE_CONST_RANDOM (v16hf, v16hi, 16) > > > > +SHUFFLE_CONST_RANDOM (v8hf, v8hi, 8) > > > > + > > > > +SHUFFLE_VEC_INDEX (v32hf, v32hi) > > > > +SHUFFLE_VEC_INDEX (v16hf, v16hi) > > > > +SHUFFLE_VEC_INDEX (v8hf, v8hi) > > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > > new file mode 100644 > > > > index 00000000000..abd91561785 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > > @@ -0,0 +1,56 @@ > > > > +/* { dg-do compile } */ > > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > > +/* { dg-final { scan-assembler-times "vpmovzxwd" "3" } } */ > > > > +/* { dg-final { scan-assembler-times "vpmovdw" "3" } } */ > > > > + > > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > > +typedef _Float16 v4hf __attribute__((vector_size (8))); > > > > +typedef short v4hi __attribute__((vector_size (8))); > > > > +typedef short v8hi __attribute__((vector_size (16))); > > > > + > > > > +#define PERM_CONST_INTERLEAVE_v32hi \ > > > > +0, 16, 1, 17, 2, 18, 3, 19, \ > > > > +4, 20, 5, 21, 6, 22, 7, 23, \ > > > > +8, 24, 9, 25, 10, 26, 11, 27, \ > > > > +12, 28, 13, 29, 14, 30, 15, 31 > > > > + > > > > +#define PERM_CONST_INTERLEAVE_v16hi \ > > > > +0, 8, 1, 9, 2, 10, 3, 11, \ > > > > +4, 12, 5, 13, 6, 14, 7, 15 > > > > + > > > > +#define PERM_CONST_INTERLEAVE_v8hi \ > > > > +0, 4, 1, 5, 2, 6, 3, 7 > > > > + > > > > +#define PERM_CONST_TRUNCATE_v32hi \ > > > > +0, 2, 4, 6, 8, 10, 12, 14, \ > > > > +16, 18, 20, 22, 24, 26, 28, 30 > > > > + > > > > +#define PERM_CONST_TRUNCATE_v16hi \ > > > > +0, 2, 4, 6, 8, 10, 12, 14 > > > > + > > > > +#define PERM_CONST_TRUNCATE_v8hi \ > > > > +0, 2, 4, 6 > > > > + > > > > +#define PERM_CONST_INTERLEAVE(size) \ > > > > + PERM_CONST_INTERLEAVE_v##size##hi > > > > + > > > > +#define PERM_CONST_TRUNCATE(size) \ > > > > + PERM_CONST_TRUNCATE_v##size##hi > > > > + > > > > +#define SHUFFLE_CONST_INTERLEAVE(type, rtype, size) \ > > > > +rtype foo_##type##shufflevector_const_interleave (type a) \ > > > > +{ \ > > > > + return __builtin_shufflevector (a, (type) {}, \ > > > > + PERM_CONST_INTERLEAVE (size)); \ > > > > +} \ > > > > +type foo_##type##shufflevector_const_trunc (rtype a) \ > > > > +{ \ > > > > + return __builtin_shufflevector (a, a, \ > > > > + PERM_CONST_TRUNCATE (size)); \ > > > > +} > > > > + > > > > +SHUFFLE_CONST_INTERLEAVE (v16hf, v32hf, 32) > > > > +SHUFFLE_CONST_INTERLEAVE (v8hf, v16hf, 16) > > > > +SHUFFLE_CONST_INTERLEAVE (v4hf, v8hf, 8) > > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > new file mode 100644 > > > > index 00000000000..bfe11236eef > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > @@ -0,0 +1,61 @@ > > > > +/* { dg-do compile } */ > > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > > +/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */ > > > > +/* { dg-final { scan-assembler-times "vmovq" 2 } } */ > > > > + > > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > > +typedef short v32hi __attribute__((vector_size (64))); > > > > +typedef short v16hi __attribute__((vector_size (32))); > > > > +typedef short v8hi __attribute__((vector_size (16))); > > > > + > > > > + > > > > +#define PERM_CONST_CONCAT0_v32hi \ > > > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > > > + 8, 9, 10, 11, 12, 13, 14, 15, \ > > > > + 34, 53, 41, 55, 57, 43, 36, 39, \ > > > > + 62, 48, 50, 51, 49, 44, 60, 37 } > > > > + > > > > +#define PERM_CONST_CONCAT0_v32hi_l \ > > > > +{ 32, 33, 34, 35, 36, 37, 38, 39, \ > > > > + 40, 41, 42, 43, 44, 45, 46, 47, \ > > > > + 31, 0, 29, 2, 27, 4, 25, 6, 23, \ > > > > + 8, 21, 10, 19, 12, 17, 14 } > > > > + > > > > +#define PERM_CONST_CONCAT0_v16hi \ > > > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > > > + 21, 26, 17, 31, 24, 22, 30, 19 } > > > > + > > > > +#define PERM_CONST_CONCAT0_v16hi_l \ > > > > +{ 16, 17, 18, 19, 20, 21, 22, 23, \ > > > > + 15, 0, 13, 2, 11, 4, 9, 6 } > > > > + > > > > +#define PERM_CONST_CONCAT0_v8hi \ > > > > +{ 0, 1, 2, 3, 9, 11, 14, 12 } > > > > + > > > > +#define PERM_CONST_CONCAT0_v8hi_l \ > > > > +{ 8, 9, 10, 11, 3, 5, 1, 7 } > > > > + > > > > +#define PERM_CONST_CONCAT0(type) \ > > > > + PERM_CONST_CONCAT0_##type > > > > + > > > > +#define PERM_CONST_CONCAT0_L(type) \ > > > > + PERM_CONST_CONCAT0_##type##_l > > > > + > > > > +#define SHUFFLE_CONST_CONCAT0(type, itype) \ > > > > +type foo_##type##shuffle_const_concat0 (type a) \ > > > > +{ \ > > > > + return __builtin_shuffle (a, (type) {0}, \ > > > > + (itype) PERM_CONST_CONCAT0 (itype)); \ > > > > +} \ > > > > +type foo_##type##shuffle_const_concat0_l (type a) \ > > > > +{ \ > > > > + return __builtin_shuffle ((type) {0}, a, \ > > > > + (itype) PERM_CONST_CONCAT0_L (itype)); \ > > > > +} > > > > + > > > > +SHUFFLE_CONST_CONCAT0 (v32hf, v32hi) > > > > +SHUFFLE_CONST_CONCAT0 (v16hf, v16hi) > > > > +SHUFFLE_CONST_CONCAT0 (v8hf, v8hi) > > > > + > > > > -- > > > > 2.18.1 > > > > > > > > > > > > > -- > > > BR, > > > Hongtao > > > > -- > BR, > Hongtao
On Fri, Oct 15, 2021 at 2:15 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote: > > > ix86_expand_vec_perm is only called by (define_expand "vec_perm<mode>" > > which means target, op0 and op1 must existed, and you can drop > > if(target/op0/op1) stuff. > > Yes, dropped. > > > Those checks for NULL seems reasonable according to documents, > > op0,op1,target maybe NULL. > Thanks for pointing it out, didn't realize the difference between > these 2 functions. LGTM. > > Updated patch. > > Hongtao Liu <crazylht@gmail.com> 于2021年10月15日周五 下午1:54写道: > > > > On Fri, Oct 15, 2021 at 1:37 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote: > > > > > > > This part seems not related to vector shuffle. > > > Yes, have separated this part to another patch and checked-in. > > > > > > Updated patch. Ok for this one? > > > > > > Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2021年10月14日周四 下午2:33写道: > > > > > > > > On Thu, Oct 14, 2021 at 10:39 AM Hongyu Wang via Gcc-patches > > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > > > Hi, > > > > > > > > > > This patch supports HFmode vector shuffle by creating HImode subreg when > > > > > expanding permutation expr. > > > > > > > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} and sde{-m32,} > > > > > OK for master? > > > > > > > > > > gcc/ChangeLog: > > > > > > > > > > * config/i386/i386-expand.c (ix86_expand_vec_perm): Convert > > > > > HFmode input operand to HImode. > > > > > (ix86_vectorize_vec_perm_const): Likewise. > > > > > (ix86_expand_vector_init): Allow HFmode for one_operand_shuffle. > > > > > * config/i386/sse.md (*avx512bw_permvar_truncv16siv16hi_1_hf): > > > > > New define_insn. > > > > > (*avx512f_permvar_truncv8siv8hi_1_hf): > > > > > Likewise. > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > * gcc.target/i386/avx512fp16-builtin_shuffle-1.c: New test. > > > > > * gcc.target/i386/avx512fp16-pr101846.c: Ditto. > > > > > * gcc.target/i386/avx512fp16-pr94680.c: Ditto. > > > > > --- > > > > > gcc/config/i386/i386-expand.c | 29 ++++++- > > > > > gcc/config/i386/sse.md | 54 +++++++++++- > > > > > .../i386/avx512fp16-builtin_shuffle-1.c | 86 +++++++++++++++++++ > > > > > .../gcc.target/i386/avx512fp16-pr101846.c | 56 ++++++++++++ > > > > > .../gcc.target/i386/avx512fp16-pr94680.c | 61 +++++++++++++ > > > > > 5 files changed, 284 insertions(+), 2 deletions(-) > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > > > > > > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > > > > > index c0924a59efb..0f50ed3b9f8 100644 > > > > > --- a/gcc/config/i386/i386-expand.c > > > > > +++ b/gcc/config/i386/i386-expand.c > > > > > @@ -4836,6 +4836,18 @@ ix86_expand_vec_perm (rtx operands[]) > > > > > e = GET_MODE_UNIT_SIZE (mode); > > > > > gcc_assert (w <= 64); > > > > > > > > > > + if (GET_MODE_INNER (mode) == HFmode) > > > > > + { > > > > > + machine_mode orig_mode = mode; > > > > > + mode = mode_for_vector (HImode, w).require (); > > > > > + if (target) > > > > > + target = lowpart_subreg (mode, target, orig_mode); > > > > > + if (op0) > > > > > + op0 = lowpart_subreg (mode, op0, orig_mode); > > > > > + if (op1) > > > > > + op1 = lowpart_subreg (mode, op1, orig_mode); > > > > > + } > > > > > + > > ix86_expand_vec_perm is only called by (define_expand "vec_perm<mode>" > > which means target, op0 and op1 must existed, and you can drop > > if(target/op0/op1) stuff. > > > > > if (TARGET_AVX512F && one_operand_shuffle) > > > > > { > > > > > rtx (*gen) (rtx, rtx, rtx) = NULL; > > > > > @@ -15092,7 +15104,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) > > > > > rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; > > > > > if (inner_mode == QImode > > > > > || inner_mode == HImode > > > > > - || inner_mode == TImode) > > > > > + || inner_mode == TImode > > > > > + || inner_mode == HFmode) > > > > This part seems not related to vector shuffle. > > > > > { > > > > > unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); > > > > > scalar_mode elt_mode = inner_mode == TImode ? DImode : SImode; > > > > > @@ -21099,6 +21112,20 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, > > > > > unsigned int i, nelt, which; > > > > > bool two_args; > > > > > > > > > > + /* For HF mode vector, convert it to HI using subreg. */ > > > > > + if (GET_MODE_INNER (vmode) == HFmode) > > > > > + { > > > > > + machine_mode orig_mode = vmode; > > > > > + vmode = mode_for_vector (HImode, > > > > > + GET_MODE_NUNITS (vmode)).require (); > > > > > + if (target) > > > > > + target = lowpart_subreg (vmode, target, orig_mode); > > > > > + if (op0) > > > > > + op0 = lowpart_subreg (vmode, op0, orig_mode); > > > > > + if (op1) > > > > > + op1 = lowpart_subreg (vmode, op1, orig_mode); > > > > > + } > > > > > + > > Those checks for NULL seems reasonable according to documents, > > op0,op1,target maybe NULL. > > @deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST > > (machine_mode @var{mode}, rtx @var{output}, rtx @var{in0}, rtx > > @var{in1}, const vec_perm_indices @var{&sel}) > > This hook is used to test whether the target can permute up to two > > vectors of mode @var{mode} using the permutation vector @code{sel}, and > > also to emit such a permutation. In the former case @var{in0}, @var{in1} > > and @var{out} are all null. In the latter case @var{in0} and @var{in1} are > > the source vectors and @var{out} is the destination vector; all three are > > operands of mode @var{mode}. @var{in1} is the same as @var{in0} if > > @var{sel} describes a permutation on one vector instead of two. > > > > > d.target = target; > > > > > d.op0 = op0; > > > > > d.op1 = op1; > > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > > > > index a3c4a3f1e62..d023d8a1c2e 100644 > > > > > --- a/gcc/config/i386/sse.md > > > > > +++ b/gcc/config/i386/sse.md > > > > > @@ -12573,6 +12573,33 @@ > > > > > (truncate:V16HI (match_dup 1)))] > > > > > "operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);") > > > > > > > > > > +(define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf" > > > > > + [(set (match_operand:V16HF 0 "nonimmediate_operand") > > > > > + (vec_select:V16HF > > > > > + (subreg:V32HF > > > > > + (unspec:V32HI > > > > > + [(match_operand:V32HI 1 "register_operand") > > > > > + (match_operand:V32HI 2 "permvar_truncate_operand")] > > > > > + UNSPEC_VPERMVAR) 0) > > > > > + (parallel [(const_int 0) (const_int 1) > > > > > + (const_int 2) (const_int 3) > > > > > + (const_int 4) (const_int 5) > > > > > + (const_int 6) (const_int 7) > > > > > + (const_int 8) (const_int 9) > > > > > + (const_int 10) (const_int 11) > > > > > + (const_int 12) (const_int 13) > > > > > + (const_int 14) (const_int 15)])))] > > > > > + "TARGET_AVX512BW && ix86_pre_reload_split ()" > > > > > + "#" > > > > > + "&& 1" > > > > > + [(set (match_dup 0) > > > > > + (truncate:V16HI (match_dup 1)))] > > > > > +{ > > > > > + operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode); > > > > > + operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode); > > > > > +}) > > > > > + > > > > > + > > > > > (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1" > > > > > [(set (match_operand:V8HI 0 "nonimmediate_operand") > > > > > (vec_select:V8HI > > > > > @@ -12591,6 +12618,28 @@ > > > > > (truncate:V8HI (match_dup 1)))] > > > > > "operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);") > > > > > > > > > > +(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf" > > > > > + [(set (match_operand:V8HF 0 "nonimmediate_operand") > > > > > + (vec_select:V8HF > > > > > + (subreg:V16HF > > > > > + (unspec:V16HI > > > > > + [(match_operand:V16HI 1 "register_operand") > > > > > + (match_operand:V16HI 2 "permvar_truncate_operand")] > > > > > + UNSPEC_VPERMVAR) 0) > > > > > + (parallel [(const_int 0) (const_int 1) > > > > > + (const_int 2) (const_int 3) > > > > > + (const_int 4) (const_int 5) > > > > > + (const_int 6) (const_int 7)])))] > > > > > + "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()" > > > > > + "#" > > > > > + "&& 1" > > > > > + [(set (match_dup 0) > > > > > + (truncate:V8HI (match_dup 1)))] > > > > > +{ > > > > > + operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode); > > > > > + operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode); > > > > > +}) > > > > > + > > > > > (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1" > > > > > [(set (match_operand:V8SI 0 "nonimmediate_operand") > > > > > (vec_select:V8SI > > > > > @@ -15603,12 +15652,15 @@ > > > > > > > > > > (define_mode_iterator VEC_PERM_AVX2 > > > > > [V16QI V8HI V4SI V2DI V4SF V2DF > > > > > + (V8HF "TARGET_AVX512FP16") > > > > > (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") > > > > > (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") > > > > > (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") > > > > > + (V16HF "TARGET_AVX512FP16") > > > > > (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") > > > > > (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") > > > > > - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) > > > > > + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") > > > > > + (V32HF "TARGET_AVX512FP16")]) > > > > > > > > > > (define_expand "vec_perm<mode>" > > > > > [(match_operand:VEC_PERM_AVX2 0 "register_operand") > > > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > > > new file mode 100644 > > > > > index 00000000000..89d3567a66b > > > > > --- /dev/null > > > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c > > > > > @@ -0,0 +1,86 @@ > > > > > +/* { dg-do compile } */ > > > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > > > +/* { dg-final { scan-assembler-not "movw" } } */ > > > > > +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ > > > > > +/* { dg-final { scan-assembler-times "vpermw" 6 } } */ > > > > > +/* { dg-final { scan-assembler-times "vpshufb" 3 } } */ > > > > > +/* { dg-final { scan-assembler-times "vpermt2w" 6 } } */ > > > > > + > > > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > > > +typedef short v32hi __attribute__((vector_size (64))); > > > > > +typedef short v16hi __attribute__((vector_size (32))); > > > > > +typedef short v8hi __attribute__((vector_size (16))); > > > > > + > > > > > +#define PERM_CONST_RANDOM_v32hi \ > > > > > +{ 0, 21, 15, 9, 43, 25, 37, 48, \ > > > > > + 8, 16, 27, 51, 30, 12, 6, 46, \ > > > > > + 34, 3, 11, 5, 17, 53, 26, 39, \ > > > > > + 2, 18, 40, 61, 19, 4, 50, 29 } > > > > > + > > > > > +#define PERM_CONST_RANDOM_RANGE32_v32hi \ > > > > > +{ 0, 21, 10, 23, 8, 18, 7, 19, \ > > > > > + 4, 25, 3, 31, 5, 22, 11, 17, \ > > > > > + 9, 20, 2, 24, 1, 30, 12, 27, \ > > > > > + 13, 28, 6, 29, 14, 16, 15, 23 } > > > > > + > > > > > +#define PERM_CONST_RANDOM_v16hi \ > > > > > +{ 0, 21, 15, 9, 13, 25, 30, 18, \ > > > > > + 8, 16, 17, 11, 4, 22, 6, 7 } > > > > > + > > > > > +#define PERM_CONST_RANDOM_RANGE16_v16hi \ > > > > > +{ 0, 9, 1, 12, 4, 15, 7, 13, \ > > > > > + 3, 10, 6, 14, 5, 8, 2, 11 } > > > > > + > > > > > +#define PERM_CONST_RANDOM_v8hi \ > > > > > +{ 0, 14, 15, 9, 13, 2, 3, 5 } > > > > > + > > > > > +#define PERM_CONST_RANDOM_RANGE8_v8hi \ > > > > > +{ 0, 7, 2, 5, 3, 4, 1, 6 } > > > > > + > > > > > +#define PERM_CONST_RANDOM(size) \ > > > > > + PERM_CONST_RANDOM_v##size##hi > > > > > + > > > > > +#define PERM_CONST_RANDOM_RANGE(size) \ > > > > > + PERM_CONST_RANDOM_RANGE##size##_v##size##hi > > > > > + > > > > > +#define SHUFFLE_CONST_RANDOM(type, itype, size) \ > > > > > +type foo_##type##shuffle_2param_const_random (type a, type b) \ > > > > > +{ \ > > > > > + return __builtin_shuffle (a, b, \ > > > > > + (itype) PERM_CONST_RANDOM (size)); \ > > > > > +} \ > > > > > +type foo_##type##shuffle_2param_const_random_range (type a, type b) \ > > > > > +{ \ > > > > > + return __builtin_shuffle (a, b, \ > > > > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > > > > +} \ > > > > > +type foo_##type##shuffle_1param_const_random (type a) \ > > > > > +{ \ > > > > > + return __builtin_shuffle (a, \ > > > > > + (itype) PERM_CONST_RANDOM (size)); \ > > > > > +} \ > > > > > +type foo_##type##shuffle_1param_const_random_range (type a) \ > > > > > +{ \ > > > > > + return __builtin_shuffle (a, \ > > > > > + (itype) PERM_CONST_RANDOM_RANGE (size)); \ > > > > > +} > > > > > + > > > > > +#define SHUFFLE_VEC_INDEX(type, itype) \ > > > > > +type foo##type##itype##shuffle_2param_vec (type a, type b, itype c) \ > > > > > +{ \ > > > > > + return __builtin_shuffle (a, b, c); \ > > > > > +} \ > > > > > +type foo##type##itype##shuffle_1param_vec (type a, itype c) \ > > > > > +{ \ > > > > > + return __builtin_shuffle (a, c); \ > > > > > +} > > > > > + > > > > > +SHUFFLE_CONST_RANDOM (v32hf, v32hi, 32) > > > > > +SHUFFLE_CONST_RANDOM (v16hf, v16hi, 16) > > > > > +SHUFFLE_CONST_RANDOM (v8hf, v8hi, 8) > > > > > + > > > > > +SHUFFLE_VEC_INDEX (v32hf, v32hi) > > > > > +SHUFFLE_VEC_INDEX (v16hf, v16hi) > > > > > +SHUFFLE_VEC_INDEX (v8hf, v8hi) > > > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > > > new file mode 100644 > > > > > index 00000000000..abd91561785 > > > > > --- /dev/null > > > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c > > > > > @@ -0,0 +1,56 @@ > > > > > +/* { dg-do compile } */ > > > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > > > +/* { dg-final { scan-assembler-times "vpmovzxwd" "3" } } */ > > > > > +/* { dg-final { scan-assembler-times "vpmovdw" "3" } } */ > > > > > + > > > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > > > +typedef _Float16 v4hf __attribute__((vector_size (8))); > > > > > +typedef short v4hi __attribute__((vector_size (8))); > > > > > +typedef short v8hi __attribute__((vector_size (16))); > > > > > + > > > > > +#define PERM_CONST_INTERLEAVE_v32hi \ > > > > > +0, 16, 1, 17, 2, 18, 3, 19, \ > > > > > +4, 20, 5, 21, 6, 22, 7, 23, \ > > > > > +8, 24, 9, 25, 10, 26, 11, 27, \ > > > > > +12, 28, 13, 29, 14, 30, 15, 31 > > > > > + > > > > > +#define PERM_CONST_INTERLEAVE_v16hi \ > > > > > +0, 8, 1, 9, 2, 10, 3, 11, \ > > > > > +4, 12, 5, 13, 6, 14, 7, 15 > > > > > + > > > > > +#define PERM_CONST_INTERLEAVE_v8hi \ > > > > > +0, 4, 1, 5, 2, 6, 3, 7 > > > > > + > > > > > +#define PERM_CONST_TRUNCATE_v32hi \ > > > > > +0, 2, 4, 6, 8, 10, 12, 14, \ > > > > > +16, 18, 20, 22, 24, 26, 28, 30 > > > > > + > > > > > +#define PERM_CONST_TRUNCATE_v16hi \ > > > > > +0, 2, 4, 6, 8, 10, 12, 14 > > > > > + > > > > > +#define PERM_CONST_TRUNCATE_v8hi \ > > > > > +0, 2, 4, 6 > > > > > + > > > > > +#define PERM_CONST_INTERLEAVE(size) \ > > > > > + PERM_CONST_INTERLEAVE_v##size##hi > > > > > + > > > > > +#define PERM_CONST_TRUNCATE(size) \ > > > > > + PERM_CONST_TRUNCATE_v##size##hi > > > > > + > > > > > +#define SHUFFLE_CONST_INTERLEAVE(type, rtype, size) \ > > > > > +rtype foo_##type##shufflevector_const_interleave (type a) \ > > > > > +{ \ > > > > > + return __builtin_shufflevector (a, (type) {}, \ > > > > > + PERM_CONST_INTERLEAVE (size)); \ > > > > > +} \ > > > > > +type foo_##type##shufflevector_const_trunc (rtype a) \ > > > > > +{ \ > > > > > + return __builtin_shufflevector (a, a, \ > > > > > + PERM_CONST_TRUNCATE (size)); \ > > > > > +} > > > > > + > > > > > +SHUFFLE_CONST_INTERLEAVE (v16hf, v32hf, 32) > > > > > +SHUFFLE_CONST_INTERLEAVE (v8hf, v16hf, 16) > > > > > +SHUFFLE_CONST_INTERLEAVE (v4hf, v8hf, 8) > > > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > > new file mode 100644 > > > > > index 00000000000..bfe11236eef > > > > > --- /dev/null > > > > > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c > > > > > @@ -0,0 +1,61 @@ > > > > > +/* { dg-do compile } */ > > > > > +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ > > > > > +/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */ > > > > > +/* { dg-final { scan-assembler-times "vmovq" 2 } } */ > > > > > + > > > > > +typedef _Float16 v32hf __attribute__((vector_size (64))); > > > > > +typedef _Float16 v16hf __attribute__((vector_size (32))); > > > > > +typedef _Float16 v8hf __attribute__((vector_size (16))); > > > > > +typedef short v32hi __attribute__((vector_size (64))); > > > > > +typedef short v16hi __attribute__((vector_size (32))); > > > > > +typedef short v8hi __attribute__((vector_size (16))); > > > > > + > > > > > + > > > > > +#define PERM_CONST_CONCAT0_v32hi \ > > > > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > > > > + 8, 9, 10, 11, 12, 13, 14, 15, \ > > > > > + 34, 53, 41, 55, 57, 43, 36, 39, \ > > > > > + 62, 48, 50, 51, 49, 44, 60, 37 } > > > > > + > > > > > +#define PERM_CONST_CONCAT0_v32hi_l \ > > > > > +{ 32, 33, 34, 35, 36, 37, 38, 39, \ > > > > > + 40, 41, 42, 43, 44, 45, 46, 47, \ > > > > > + 31, 0, 29, 2, 27, 4, 25, 6, 23, \ > > > > > + 8, 21, 10, 19, 12, 17, 14 } > > > > > + > > > > > +#define PERM_CONST_CONCAT0_v16hi \ > > > > > +{ 0, 1, 2, 3, 4, 5, 6, 7, \ > > > > > + 21, 26, 17, 31, 24, 22, 30, 19 } > > > > > + > > > > > +#define PERM_CONST_CONCAT0_v16hi_l \ > > > > > +{ 16, 17, 18, 19, 20, 21, 22, 23, \ > > > > > + 15, 0, 13, 2, 11, 4, 9, 6 } > > > > > + > > > > > +#define PERM_CONST_CONCAT0_v8hi \ > > > > > +{ 0, 1, 2, 3, 9, 11, 14, 12 } > > > > > + > > > > > +#define PERM_CONST_CONCAT0_v8hi_l \ > > > > > +{ 8, 9, 10, 11, 3, 5, 1, 7 } > > > > > + > > > > > +#define PERM_CONST_CONCAT0(type) \ > > > > > + PERM_CONST_CONCAT0_##type > > > > > + > > > > > +#define PERM_CONST_CONCAT0_L(type) \ > > > > > + PERM_CONST_CONCAT0_##type##_l > > > > > + > > > > > +#define SHUFFLE_CONST_CONCAT0(type, itype) \ > > > > > +type foo_##type##shuffle_const_concat0 (type a) \ > > > > > +{ \ > > > > > + return __builtin_shuffle (a, (type) {0}, \ > > > > > + (itype) PERM_CONST_CONCAT0 (itype)); \ > > > > > +} \ > > > > > +type foo_##type##shuffle_const_concat0_l (type a) \ > > > > > +{ \ > > > > > + return __builtin_shuffle ((type) {0}, a, \ > > > > > + (itype) PERM_CONST_CONCAT0_L (itype)); \ > > > > > +} > > > > > + > > > > > +SHUFFLE_CONST_CONCAT0 (v32hf, v32hi) > > > > > +SHUFFLE_CONST_CONCAT0 (v16hf, v16hi) > > > > > +SHUFFLE_CONST_CONCAT0 (v8hf, v8hi) > > > > > + > > > > > -- > > > > > 2.18.1 > > > > > > > > > > > > > > > > > -- > > > > BR, > > > > Hongtao > > > > > > > > -- > > BR, > > Hongtao
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index c0924a59efb..0f50ed3b9f8 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -4836,6 +4836,18 @@ ix86_expand_vec_perm (rtx operands[]) e = GET_MODE_UNIT_SIZE (mode); gcc_assert (w <= 64); + if (GET_MODE_INNER (mode) == HFmode) + { + machine_mode orig_mode = mode; + mode = mode_for_vector (HImode, w).require (); + if (target) + target = lowpart_subreg (mode, target, orig_mode); + if (op0) + op0 = lowpart_subreg (mode, op0, orig_mode); + if (op1) + op1 = lowpart_subreg (mode, op1, orig_mode); + } + if (TARGET_AVX512F && one_operand_shuffle) { rtx (*gen) (rtx, rtx, rtx) = NULL; @@ -15092,7 +15104,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; if (inner_mode == QImode || inner_mode == HImode - || inner_mode == TImode) + || inner_mode == TImode + || inner_mode == HFmode) { unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); scalar_mode elt_mode = inner_mode == TImode ? DImode : SImode; @@ -21099,6 +21112,20 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, unsigned int i, nelt, which; bool two_args; + /* For HF mode vector, convert it to HI using subreg. */ + if (GET_MODE_INNER (vmode) == HFmode) + { + machine_mode orig_mode = vmode; + vmode = mode_for_vector (HImode, + GET_MODE_NUNITS (vmode)).require (); + if (target) + target = lowpart_subreg (vmode, target, orig_mode); + if (op0) + op0 = lowpart_subreg (vmode, op0, orig_mode); + if (op1) + op1 = lowpart_subreg (vmode, op1, orig_mode); + } + d.target = target; d.op0 = op0; d.op1 = op1; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a3c4a3f1e62..d023d8a1c2e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12573,6 +12573,33 @@ (truncate:V16HI (match_dup 1)))] "operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);") +(define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf" + [(set (match_operand:V16HF 0 "nonimmediate_operand") + (vec_select:V16HF + (subreg:V32HF + (unspec:V32HI + [(match_operand:V32HI 1 "register_operand") + (match_operand:V32HI 2 "permvar_truncate_operand")] + UNSPEC_VPERMVAR) 0) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)])))] + "TARGET_AVX512BW && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (truncate:V16HI (match_dup 1)))] +{ + operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode); + operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode); +}) + + (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1" [(set (match_operand:V8HI 0 "nonimmediate_operand") (vec_select:V8HI @@ -12591,6 +12618,28 @@ (truncate:V8HI (match_dup 1)))] "operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);") +(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf" + [(set (match_operand:V8HF 0 "nonimmediate_operand") + (vec_select:V8HF + (subreg:V16HF + (unspec:V16HI + [(match_operand:V16HI 1 "register_operand") + (match_operand:V16HI 2 "permvar_truncate_operand")] + UNSPEC_VPERMVAR) 0) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (truncate:V8HI (match_dup 1)))] +{ + operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode); + operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode); +}) + (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1" [(set (match_operand:V8SI 0 "nonimmediate_operand") (vec_select:V8SI @@ -15603,12 +15652,15 @@ (define_mode_iterator VEC_PERM_AVX2 [V16QI V8HI V4SI V2DI V4SF V2DF + (V8HF "TARGET_AVX512FP16") (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") + (V16HF "TARGET_AVX512FP16") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") + (V32HF "TARGET_AVX512FP16")]) (define_expand "vec_perm<mode>" [(match_operand:VEC_PERM_AVX2 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c new file mode 100644 index 00000000000..89d3567a66b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c @@ -0,0 +1,86 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-not "movw" } } */ +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ +/* { dg-final { scan-assembler-times "vpermw" 6 } } */ +/* { dg-final { scan-assembler-times "vpshufb" 3 } } */ +/* { dg-final { scan-assembler-times "vpermt2w" 6 } } */ + +typedef _Float16 v32hf __attribute__((vector_size (64))); +typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef _Float16 v8hf __attribute__((vector_size (16))); +typedef short v32hi __attribute__((vector_size (64))); +typedef short v16hi __attribute__((vector_size (32))); +typedef short v8hi __attribute__((vector_size (16))); + +#define PERM_CONST_RANDOM_v32hi \ +{ 0, 21, 15, 9, 43, 25, 37, 48, \ + 8, 16, 27, 51, 30, 12, 6, 46, \ + 34, 3, 11, 5, 17, 53, 26, 39, \ + 2, 18, 40, 61, 19, 4, 50, 29 } + +#define PERM_CONST_RANDOM_RANGE32_v32hi \ +{ 0, 21, 10, 23, 8, 18, 7, 19, \ + 4, 25, 3, 31, 5, 22, 11, 17, \ + 9, 20, 2, 24, 1, 30, 12, 27, \ + 13, 28, 6, 29, 14, 16, 15, 23 } + +#define PERM_CONST_RANDOM_v16hi \ +{ 0, 21, 15, 9, 13, 25, 30, 18, \ + 8, 16, 17, 11, 4, 22, 6, 7 } + +#define PERM_CONST_RANDOM_RANGE16_v16hi \ +{ 0, 9, 1, 12, 4, 15, 7, 13, \ + 3, 10, 6, 14, 5, 8, 2, 11 } + +#define PERM_CONST_RANDOM_v8hi \ +{ 0, 14, 15, 9, 13, 2, 3, 5 } + +#define PERM_CONST_RANDOM_RANGE8_v8hi \ +{ 0, 7, 2, 5, 3, 4, 1, 6 } + +#define PERM_CONST_RANDOM(size) \ + PERM_CONST_RANDOM_v##size##hi + +#define PERM_CONST_RANDOM_RANGE(size) \ + PERM_CONST_RANDOM_RANGE##size##_v##size##hi + +#define SHUFFLE_CONST_RANDOM(type, itype, size) \ +type foo_##type##shuffle_2param_const_random (type a, type b) \ +{ \ + return __builtin_shuffle (a, b, \ + (itype) PERM_CONST_RANDOM (size)); \ +} \ +type foo_##type##shuffle_2param_const_random_range (type a, type b) \ +{ \ + return __builtin_shuffle (a, b, \ + (itype) PERM_CONST_RANDOM_RANGE (size)); \ +} \ +type foo_##type##shuffle_1param_const_random (type a) \ +{ \ + return __builtin_shuffle (a, \ + (itype) PERM_CONST_RANDOM (size)); \ +} \ +type foo_##type##shuffle_1param_const_random_range (type a) \ +{ \ + return __builtin_shuffle (a, \ + (itype) PERM_CONST_RANDOM_RANGE (size)); \ +} + +#define SHUFFLE_VEC_INDEX(type, itype) \ +type foo##type##itype##shuffle_2param_vec (type a, type b, itype c) \ +{ \ + return __builtin_shuffle (a, b, c); \ +} \ +type foo##type##itype##shuffle_1param_vec (type a, itype c) \ +{ \ + return __builtin_shuffle (a, c); \ +} + +SHUFFLE_CONST_RANDOM (v32hf, v32hi, 32) +SHUFFLE_CONST_RANDOM (v16hf, v16hi, 16) +SHUFFLE_CONST_RANDOM (v8hf, v8hi, 8) + +SHUFFLE_VEC_INDEX (v32hf, v32hi) +SHUFFLE_VEC_INDEX (v16hf, v16hi) +SHUFFLE_VEC_INDEX (v8hf, v8hi) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c new file mode 100644 index 00000000000..abd91561785 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vpmovzxwd" "3" } } */ +/* { dg-final { scan-assembler-times "vpmovdw" "3" } } */ + +typedef _Float16 v32hf __attribute__((vector_size (64))); +typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef _Float16 v8hf __attribute__((vector_size (16))); +typedef _Float16 v4hf __attribute__((vector_size (8))); +typedef short v4hi __attribute__((vector_size (8))); +typedef short v8hi __attribute__((vector_size (16))); + +#define PERM_CONST_INTERLEAVE_v32hi \ +0, 16, 1, 17, 2, 18, 3, 19, \ +4, 20, 5, 21, 6, 22, 7, 23, \ +8, 24, 9, 25, 10, 26, 11, 27, \ +12, 28, 13, 29, 14, 30, 15, 31 + +#define PERM_CONST_INTERLEAVE_v16hi \ +0, 8, 1, 9, 2, 10, 3, 11, \ +4, 12, 5, 13, 6, 14, 7, 15 + +#define PERM_CONST_INTERLEAVE_v8hi \ +0, 4, 1, 5, 2, 6, 3, 7 + +#define PERM_CONST_TRUNCATE_v32hi \ +0, 2, 4, 6, 8, 10, 12, 14, \ +16, 18, 20, 22, 24, 26, 28, 30 + +#define PERM_CONST_TRUNCATE_v16hi \ +0, 2, 4, 6, 8, 10, 12, 14 + +#define PERM_CONST_TRUNCATE_v8hi \ +0, 2, 4, 6 + +#define PERM_CONST_INTERLEAVE(size) \ + PERM_CONST_INTERLEAVE_v##size##hi + +#define PERM_CONST_TRUNCATE(size) \ + PERM_CONST_TRUNCATE_v##size##hi + +#define SHUFFLE_CONST_INTERLEAVE(type, rtype, size) \ +rtype foo_##type##shufflevector_const_interleave (type a) \ +{ \ + return __builtin_shufflevector (a, (type) {}, \ + PERM_CONST_INTERLEAVE (size)); \ +} \ +type foo_##type##shufflevector_const_trunc (rtype a) \ +{ \ + return __builtin_shufflevector (a, a, \ + PERM_CONST_TRUNCATE (size)); \ +} + +SHUFFLE_CONST_INTERLEAVE (v16hf, v32hf, 32) +SHUFFLE_CONST_INTERLEAVE (v8hf, v16hf, 16) +SHUFFLE_CONST_INTERLEAVE (v4hf, v8hf, 8) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c new file mode 100644 index 00000000000..bfe11236eef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */ +/* { dg-final { scan-assembler-times "vmovq" 2 } } */ + +typedef _Float16 v32hf __attribute__((vector_size (64))); +typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef _Float16 v8hf __attribute__((vector_size (16))); +typedef short v32hi __attribute__((vector_size (64))); +typedef short v16hi __attribute__((vector_size (32))); +typedef short v8hi __attribute__((vector_size (16))); + + +#define PERM_CONST_CONCAT0_v32hi \ +{ 0, 1, 2, 3, 4, 5, 6, 7, \ + 8, 9, 10, 11, 12, 13, 14, 15, \ + 34, 53, 41, 55, 57, 43, 36, 39, \ + 62, 48, 50, 51, 49, 44, 60, 37 } + +#define PERM_CONST_CONCAT0_v32hi_l \ +{ 32, 33, 34, 35, 36, 37, 38, 39, \ + 40, 41, 42, 43, 44, 45, 46, 47, \ + 31, 0, 29, 2, 27, 4, 25, 6, 23, \ + 8, 21, 10, 19, 12, 17, 14 } + +#define PERM_CONST_CONCAT0_v16hi \ +{ 0, 1, 2, 3, 4, 5, 6, 7, \ + 21, 26, 17, 31, 24, 22, 30, 19 } + +#define PERM_CONST_CONCAT0_v16hi_l \ +{ 16, 17, 18, 19, 20, 21, 22, 23, \ + 15, 0, 13, 2, 11, 4, 9, 6 } + +#define PERM_CONST_CONCAT0_v8hi \ +{ 0, 1, 2, 3, 9, 11, 14, 12 } + +#define PERM_CONST_CONCAT0_v8hi_l \ +{ 8, 9, 10, 11, 3, 5, 1, 7 } + +#define PERM_CONST_CONCAT0(type) \ + PERM_CONST_CONCAT0_##type + +#define PERM_CONST_CONCAT0_L(type) \ + PERM_CONST_CONCAT0_##type##_l + +#define SHUFFLE_CONST_CONCAT0(type, itype) \ +type foo_##type##shuffle_const_concat0 (type a) \ +{ \ + return __builtin_shuffle (a, (type) {0}, \ + (itype) PERM_CONST_CONCAT0 (itype)); \ +} \ +type foo_##type##shuffle_const_concat0_l (type a) \ +{ \ + return __builtin_shuffle ((type) {0}, a, \ + (itype) PERM_CONST_CONCAT0_L (itype)); \ +} + +SHUFFLE_CONST_CONCAT0 (v32hf, v32hi) +SHUFFLE_CONST_CONCAT0 (v16hf, v16hi) +SHUFFLE_CONST_CONCAT0 (v8hf, v8hi) +