Message ID | 20240704032429.629361-1-admin@levyhsu.com |
---|---|
State | New |
Headers | show |
Series | Support bitwise and/andnot/abs/neg/copysign/xorsign op for V8BF/V16BF/V32BF | expand |
On Thu, Jul 4, 2024 at 11:24 AM Levy Hsu <admin@levyhsu.com> wrote: > > This patch extends support for BF16 vector operations in GCC, including bitwise AND, ANDNOT, ABS, NEG, COPYSIGN, and XORSIGN for V8BF, V16BF, and V32BF modes. > Bootstrapped and tested on x86_64-linux-gnu. ok for trunk? > > gcc/ChangeLog: > > * config/i386/i386-expand.cc (ix86_expand_fp_absneg_operator): Add VBF modes. > (ix86_expand_copysign): Ditto. > (ix86_expand_xorsign): Ditto. > * config/i386/i386.cc (ix86_build_const_vector): Ditto. > (ix86_build_signbit_mask): Ditto. > * config/i386/sse.md: Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx2-bf16-vec-absneg.c: New test. > * gcc.target/i386/avx512f-bf16-vec-absneg.c: New test. > > --- > gcc/config/i386/i386-expand.cc | 76 +++++++++++------ > gcc/config/i386/i386.cc | 6 ++ > gcc/config/i386/sse.md | 37 +++++--- > .../gcc.target/i386/avx2-bf16-vec-absneg.c | 85 +++++++++++++++++++ > .../gcc.target/i386/avx512f-bf16-vec-absneg.c | 66 ++++++++++++++ > 5 files changed, 234 insertions(+), 36 deletions(-) > create mode 100755 gcc/testsuite/gcc.target/i386/avx2-bf16-vec-absneg.c > create mode 100755 gcc/testsuite/gcc.target/i386/avx512f-bf16-vec-absneg.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 5c29ee1353f..46d13a55e6a 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -2175,20 +2175,28 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, > machine_mode vmode = mode; > rtvec par; > > - if (vector_mode || mode == TFmode || mode == HFmode) > - { > - use_sse = true; > - if (mode == HFmode) > - vmode = V8HFmode; > - } > - else if (TARGET_SSE_MATH) > - { > - use_sse = SSE_FLOAT_MODE_P (mode); > - if (mode == SFmode) > - vmode = V4SFmode; > - else if (mode == DFmode) > - vmode = V2DFmode; > - } > + switch (mode) > + { > + case HFmode: > + use_sse = true; > + vmode = V8HFmode; > + break; > + case BFmode: > + use_sse = true; > + vmode = V8BFmode; > + break; > + case SFmode: > + use_sse = TARGET_SSE_MATH; use_sse = TARGET_SSE_MATH && TARGET_SSE; > + vmode = V4SFmode; > + break; > + case DFmode: > + use_sse = TARGET_SSE_MATH; use_sse = TARGET_SSE_MATH && TARGET_SSE2; Others LGTM. > + vmode = V2DFmode; > + break; > + default: > + use_sse = vector_mode || mode == TFmode; > + break; > + } > > dst = operands[0]; > src = operands[1]; > @@ -2321,16 +2329,26 @@ ix86_expand_copysign (rtx operands[]) > > mode = GET_MODE (operands[0]); > > - if (mode == HFmode) > + switch (mode) > + { > + case HFmode: > vmode = V8HFmode; > - else if (mode == SFmode) > + break; > + case BFmode: > + vmode = V8BFmode; > + break; > + case SFmode: > vmode = V4SFmode; > - else if (mode == DFmode) > + break; > + case DFmode: > vmode = V2DFmode; > - else if (mode == TFmode) > + break; > + case TFmode: > vmode = mode; > - else > - gcc_unreachable (); > + break; > + default: > + gcc_unreachable(); > + } > > if (rtx_equal_p (operands[1], operands[2])) > { > @@ -2391,14 +2409,24 @@ ix86_expand_xorsign (rtx operands[]) > > mode = GET_MODE (dest); > > - if (mode == HFmode) > + switch (mode) > + { > + case HFmode: > vmode = V8HFmode; > - else if (mode == SFmode) > + break; > + case BFmode: > + vmode = V8BFmode; > + break; > + case SFmode: > vmode = V4SFmode; > - else if (mode == DFmode) > + break; > + case DFmode: > vmode = V2DFmode; > - else > + break; > + default: > gcc_unreachable (); > + break; > + } > > temp = gen_reg_rtx (vmode); > mask = ix86_build_signbit_mask (vmode, 0, 0); > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index d4ccc24be6e..b5768a65e52 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -16353,6 +16353,9 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value) > case E_V8DFmode: > case E_V4DFmode: > case E_V2DFmode: > + case E_V32BFmode: > + case E_V16BFmode: > + case E_V8BFmode: > n_elt = GET_MODE_NUNITS (mode); > v = rtvec_alloc (n_elt); > scalar_mode = GET_MODE_INNER (mode); > @@ -16389,6 +16392,9 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) > case E_V8HFmode: > case E_V16HFmode: > case E_V32HFmode: > + case E_V32BFmode: > + case E_V16BFmode: > + case E_V8BFmode: > vec_mode = mode; > imode = HImode; > break; > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 0be2dcd8891..1703bbb4250 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -351,7 +351,9 @@ > > ;; 128-, 256- and 512-bit float vector modes for bitwise operations > (define_mode_iterator VFB > - [(V32HF "TARGET_AVX512F && TARGET_EVEX512") > + [(V32BF "TARGET_AVX512F && TARGET_EVEX512") > + (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2") > + (V32HF "TARGET_AVX512F && TARGET_EVEX512") > (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") > (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF > (V8DF "TARGET_AVX512F && TARGET_EVEX512") > @@ -364,7 +366,8 @@ > > ;; 128- and 256-bit float vector modes for bitwise operations > (define_mode_iterator VFB_128_256 > - [(V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") > + [(V16BF "TARGET_AVX") (V8BF "TARGET_SSE2") > + (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") > (V8SF "TARGET_AVX") V4SF > (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) > > @@ -422,7 +425,10 @@ > > ;; All 512bit vector float modes for bitwise operations > (define_mode_iterator VFB_512 > - [(V32HF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")]) > + [(V32BF "TARGET_EVEX512") > + (V32HF "TARGET_EVEX512") > + (V16SF "TARGET_EVEX512") > + (V8DF "TARGET_EVEX512")]) > > (define_mode_iterator V4SF_V8HF > [V4SF V8HF]) > @@ -929,6 +935,8 @@ > (define_mode_attr sse > [(SF "sse") (DF "sse2") (HF "avx512fp16") > (V4SF "sse") (V2DF "sse2") > + (V32BF "avx512bf16") (V16BF "avx512bf16") > + (V8BF "avx512bf16") > (V32HF "avx512fp16") (V16HF "avx512fp16") > (V8HF "avx512fp16") > (V16SF "avx512f") (V8SF "avx") > @@ -1058,7 +1066,8 @@ > (define_mode_attr sseintvecmode2 > [(V8DF "XI") (V4DF "OI") (V2DF "TI") > (V8SF "OI") (V4SF "TI") > - (V16HF "OI") (V8HF "TI")]) > + (V16HF "OI") (V8HF "TI") > + (V16BF "OI") (V8BF "TI")]) > > (define_mode_attr sseintvecmodelower > [(V32HF "v32hi") (V32BF "v32hi") (V16SF "v16si") (V8DF "v8di") > @@ -4939,7 +4948,7 @@ > (match_operand:VFB_128_256 1 "register_operand" "0,x,v,v")) > (match_operand:VFB_128_256 2 "vector_operand" "xBm,xjm,vm,vm")))] > "TARGET_SSE && <mask_avx512vl_condition> > - && (!<mask_applied> || <ssescalarmode>mode != HFmode)" > + && (!<mask_applied> || <ssescalarsize> != 16)" > { > char buf[128]; > const char *ops; > @@ -4961,6 +4970,8 @@ > > switch (get_attr_mode (insn)) > { > + case MODE_V16BF: > + case MODE_V8BF: > case MODE_V16HF: > case MODE_V8HF: > case MODE_V8SF: > @@ -5011,7 +5022,7 @@ > (not:VFB_512 > (match_operand:VFB_512 1 "register_operand" "v")) > (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))] > - "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)" > + "TARGET_AVX512F && (!<mask_applied> || <ssescalarsize> != 16)" > { > char buf[128]; > const char *ops; > @@ -5022,7 +5033,7 @@ > > /* Since there are no vandnp[sd] without AVX512DQ nor vandnph, > use vp<logic>[dq]. */ > - if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode) > + if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode || <MODE>mode == V32BFmode) > { > suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; > ops = "p"; > @@ -5047,7 +5058,7 @@ > (match_operand:VFB_128_256 1 "vector_operand") > (match_operand:VFB_128_256 2 "vector_operand")))] > "TARGET_SSE && <mask_avx512vl_condition> > - && (!<mask_applied> || <ssescalarmode>mode != HFmode)" > + && (!<mask_applied> || <ssescalarsize> != 16)" > "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") > > (define_expand "<code><mode>3<mask_name>" > @@ -5055,7 +5066,7 @@ > (any_logic:VFB_512 > (match_operand:VFB_512 1 "nonimmediate_operand") > (match_operand:VFB_512 2 "nonimmediate_operand")))] > - "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)" > + "TARGET_AVX512F && (!<mask_applied> || <ssescalarsize> != 16)" > "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") > > (define_insn "*<code><mode>3<mask_name>" > @@ -5064,7 +5075,7 @@ > (match_operand:VFB_128_256 1 "vector_operand" "%0,x,v,v") > (match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] > "TARGET_SSE && <mask_avx512vl_condition> > - && (!<mask_applied> || <ssescalarmode>mode != HFmode) > + && (!<mask_applied> || <ssescalarsize> != 16) > && !(MEM_P (operands[1]) && MEM_P (operands[2]))" > { > char buf[128]; > @@ -5087,6 +5098,8 @@ > > switch (get_attr_mode (insn)) > { > + case MODE_V16BF: > + case MODE_V8BF: > case MODE_V16HF: > case MODE_V8HF: > case MODE_V8SF: > @@ -5132,7 +5145,7 @@ > (match_operand:VFB_512 1 "nonimmediate_operand" "%v") > (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))] > "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2])) > - && (!<mask_applied> || <ssescalarmode>mode != HFmode)" > + && (!<mask_applied> || <ssescalarsize> != 16)" > { > char buf[128]; > const char *ops; > @@ -5143,7 +5156,7 @@ > > /* Since there are no v<logic>p[sd] without AVX512DQ nor v<logic>ph, > use vp<logic>[dq]. */ > - if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode) > + if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode || <MODE>mode == V32BFmode) > { > suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; > ops = "p"; > diff --git a/gcc/testsuite/gcc.target/i386/avx2-bf16-vec-absneg.c b/gcc/testsuite/gcc.target/i386/avx2-bf16-vec-absneg.c > new file mode 100755 > index 00000000000..a3ee0b164f7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx2-bf16-vec-absneg.c > @@ -0,0 +1,85 @@ > +/* { dg-do run { target avx2 } } */ > +/* { dg-options "-O1 -mavx512bf16 -fdump-tree-vect-details -fdump-tree-optimized" } */ > + > +extern void abort (void); > +static void do_test (void); > + > +#define DO_TEST do_test > +#define AVX512BF16 > +#include "avx512-check.h" > + > +__bf16 b_128[8], r_abs_128[8], r_neg_128[8]; > +__bf16 b_256[16], r_abs_256[16], r_neg_256[16]; > + > +void > +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) > +abs_128 (void) > +{ > + for (int i = 0; i < 8; i++) > + r_abs_128[i] = __builtin_fabsf16(b_128[i]); > +} > + > +void > +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) > +neg_128 (void) > +{ > + for (int i = 0; i < 8; i++) > + r_neg_128[i] = -b_128[i]; > +} > + > +void > +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) > +abs_256 (void) > +{ > + for (int i = 0; i < 16; i++) > + r_abs_256[i] = __builtin_fabsf16(b_256[i]); > +} > + > +void > +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) > +neg_256 (void) > +{ > + for (int i = 0; i < 16; i++) > + r_neg_256[i] = -b_256[i]; > +} > + > +void > +check_absneg_results (__bf16 *b, __bf16 *r_abs, __bf16 *r_neg, int len) > +{ > + for (int i = 0; i < len; i++) > + { > + __bf16 expected_abs = __builtin_fabsf16(b[i]); > + __bf16 expected_neg = -b[i]; > + if (r_abs[i] != expected_abs || r_neg[i] != expected_neg) > + abort (); > + } > +} > + > +static void > +__attribute__ ((noinline, noclone)) > +do_test (void) > +{ > + /* Initialize test values */ > + float float_b[16] = {-1.2f, 3.4f, -5.6f, 7.8f, > + -9.0f, 1.0f, -2.0f, 3.0f, > + -4.0f, -5.0f, 6.0f, 7.0f, > + -8.0f, -9.0f, 10.0f, 11.0f}; > + > + for (int i = 0; i < 8; i++) > + b_128[i] = (__bf16)float_b[i]; > + > + for (int i = 0; i < 16; i++) > + b_256[i] = (__bf16)float_b[i]; > + > + abs_128 (); > + neg_128 (); > + check_absneg_results (b_128, r_abs_128, r_neg_128, 8); > + > + abs_256 (); > + neg_256 (); > + check_absneg_results (b_256, r_abs_256, r_neg_256, 16); > +} > + > +/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 2 "vect" } } */ > +/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 2 "vect" } } */ > +/* { dg-final { scan-tree-dump-times {(?n)ABS_EXPR <vect} 2 "optimized" { target { ! ia32 } } } } */ > \ No newline at end of file > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-bf16-vec-absneg.c b/gcc/testsuite/gcc.target/i386/avx512f-bf16-vec-absneg.c > new file mode 100755 > index 00000000000..01c7ad77204 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-bf16-vec-absneg.c > @@ -0,0 +1,66 @@ > +/* { dg-do run { target avx512f } } */ > +/* { dg-options "-O1 -mavx512bf16 -fdump-tree-vect-details -fdump-tree-optimized" } */ > + > +extern void abort (void); > +static void do_test (void); > + > +#define DO_TEST do_test > +#define AVX512BF16 > +#include "avx512-check.h" > + > +__bf16 b_512[32], r_abs_512[32], r_neg_512[32]; > + > +void > +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf, > +target("prefer-vector-width=512"))) > +abs_512 (void) > +{ > + for (int i = 0; i < 32; i++) > + r_abs_512[i] = __builtin_fabsf16(b_512[i]); > +} > + > +void > +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf, > +target("prefer-vector-width=512"))) > +neg_512 (void) > +{ > + for (int i = 0; i < 32; i++) > + r_neg_512[i] = -b_512[i]; > +} > + > +void > +check_absneg_results (__bf16 *b, __bf16 *r_abs, __bf16 *r_neg, int len) > +{ > + for (int i = 0; i < len; i++) > + { > + __bf16 expected_abs = __builtin_fabsf16(b[i]); > + __bf16 expected_neg = -b[i]; > + if (r_abs[i] != expected_abs || r_neg[i] != expected_neg) > + abort (); > + } > +} > + > +static void > +__attribute__ ((noinline, noclone)) > +do_test (void) > +{ > + /* Initialize test values */ > + float float_b[32] = {-1.2f, 3.4f, -5.6f, 7.8f, > + -9.0f, 1.0f, -2.0f, 3.0f, > + -4.0f, -5.0f, 6.0f, 7.0f, > + -8.0f, -9.0f, 10.0f, 11.0f, > + -1.2f, 3.4f, -5.6f, 7.8f, > + -9.0f, 1.0f, -2.0f, 3.0f, > + -4.0f, -5.0f, 6.0f, 7.0f, > + -8.0f, -9.0f, 10.0f, 11.0f}; > + > + for (int i = 0; i < 32; i++) > + b_512[i] = (__bf16)float_b[i]; > + > + abs_512 (); > + neg_512 (); > + check_absneg_results (b_512, r_abs_512, r_neg_512, 32); > +} > + > +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 2 "vect" } } */ > +/* { dg-final { scan-tree-dump-times {(?n)ABS_EXPR <vect} 1 "optimized" { target { ! ia32 } } } } */ > \ No newline at end of file > -- > 2.31.1 >
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 5c29ee1353f..46d13a55e6a 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2175,20 +2175,28 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, machine_mode vmode = mode; rtvec par; - if (vector_mode || mode == TFmode || mode == HFmode) - { - use_sse = true; - if (mode == HFmode) - vmode = V8HFmode; - } - else if (TARGET_SSE_MATH) - { - use_sse = SSE_FLOAT_MODE_P (mode); - if (mode == SFmode) - vmode = V4SFmode; - else if (mode == DFmode) - vmode = V2DFmode; - } + switch (mode) + { + case HFmode: + use_sse = true; + vmode = V8HFmode; + break; + case BFmode: + use_sse = true; + vmode = V8BFmode; + break; + case SFmode: + use_sse = TARGET_SSE_MATH; + vmode = V4SFmode; + break; + case DFmode: + use_sse = TARGET_SSE_MATH; + vmode = V2DFmode; + break; + default: + use_sse = vector_mode || mode == TFmode; + break; + } dst = operands[0]; src = operands[1]; @@ -2321,16 +2329,26 @@ ix86_expand_copysign (rtx operands[]) mode = GET_MODE (operands[0]); - if (mode == HFmode) + switch (mode) + { + case HFmode: vmode = V8HFmode; - else if (mode == SFmode) + break; + case BFmode: + vmode = V8BFmode; + break; + case SFmode: vmode = V4SFmode; - else if (mode == DFmode) + break; + case DFmode: vmode = V2DFmode; - else if (mode == TFmode) + break; + case TFmode: vmode = mode; - else - gcc_unreachable (); + break; + default: + gcc_unreachable(); + } if (rtx_equal_p (operands[1], operands[2])) { @@ -2391,14 +2409,24 @@ ix86_expand_xorsign (rtx operands[]) mode = GET_MODE (dest); - if (mode == HFmode) + switch (mode) + { + case HFmode: vmode = V8HFmode; - else if (mode == SFmode) + break; + case BFmode: + vmode = V8BFmode; + break; + case SFmode: vmode = V4SFmode; - else if (mode == DFmode) + break; + case DFmode: vmode = V2DFmode; - else + break; + default: gcc_unreachable (); + break; + } temp = gen_reg_rtx (vmode); mask = ix86_build_signbit_mask (vmode, 0, 0); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index d4ccc24be6e..b5768a65e52 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -16353,6 +16353,9 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value) case E_V8DFmode: case E_V4DFmode: case E_V2DFmode: + case E_V32BFmode: + case E_V16BFmode: + case E_V8BFmode: n_elt = GET_MODE_NUNITS (mode); v = rtvec_alloc (n_elt); scalar_mode = GET_MODE_INNER (mode); @@ -16389,6 +16392,9 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) case E_V8HFmode: case E_V16HFmode: case E_V32HFmode: + case E_V32BFmode: + case E_V16BFmode: + case E_V8BFmode: vec_mode = mode; imode = HImode; break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0be2dcd8891..1703bbb4250 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -351,7 +351,9 @@ ;; 128-, 256- and 512-bit float vector modes for bitwise operations (define_mode_iterator VFB - [(V32HF "TARGET_AVX512F && TARGET_EVEX512") + [(V32BF "TARGET_AVX512F && TARGET_EVEX512") + (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2") + (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F && TARGET_EVEX512") @@ -364,7 +366,8 @@ ;; 128- and 256-bit float vector modes for bitwise operations (define_mode_iterator VFB_128_256 - [(V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") + [(V16BF "TARGET_AVX") (V8BF "TARGET_SSE2") + (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") (V8SF "TARGET_AVX") V4SF (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) @@ -422,7 +425,10 @@ ;; All 512bit vector float modes for bitwise operations (define_mode_iterator VFB_512 - [(V32HF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")]) + [(V32BF "TARGET_EVEX512") + (V32HF "TARGET_EVEX512") + (V16SF "TARGET_EVEX512") + (V8DF "TARGET_EVEX512")]) (define_mode_iterator V4SF_V8HF [V4SF V8HF]) @@ -929,6 +935,8 @@ (define_mode_attr sse [(SF "sse") (DF "sse2") (HF "avx512fp16") (V4SF "sse") (V2DF "sse2") + (V32BF "avx512bf16") (V16BF "avx512bf16") + (V8BF "avx512bf16") (V32HF "avx512fp16") (V16HF "avx512fp16") (V8HF "avx512fp16") (V16SF "avx512f") (V8SF "avx") @@ -1058,7 +1066,8 @@ (define_mode_attr sseintvecmode2 [(V8DF "XI") (V4DF "OI") (V2DF "TI") (V8SF "OI") (V4SF "TI") - (V16HF "OI") (V8HF "TI")]) + (V16HF "OI") (V8HF "TI") + (V16BF "OI") (V8BF "TI")]) (define_mode_attr sseintvecmodelower [(V32HF "v32hi") (V32BF "v32hi") (V16SF "v16si") (V8DF "v8di") @@ -4939,7 +4948,7 @@ (match_operand:VFB_128_256 1 "register_operand" "0,x,v,v")) (match_operand:VFB_128_256 2 "vector_operand" "xBm,xjm,vm,vm")))] "TARGET_SSE && <mask_avx512vl_condition> - && (!<mask_applied> || <ssescalarmode>mode != HFmode)" + && (!<mask_applied> || <ssescalarsize> != 16)" { char buf[128]; const char *ops; @@ -4961,6 +4970,8 @@ switch (get_attr_mode (insn)) { + case MODE_V16BF: + case MODE_V8BF: case MODE_V16HF: case MODE_V8HF: case MODE_V8SF: @@ -5011,7 +5022,7 @@ (not:VFB_512 (match_operand:VFB_512 1 "register_operand" "v")) (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)" + "TARGET_AVX512F && (!<mask_applied> || <ssescalarsize> != 16)" { char buf[128]; const char *ops; @@ -5022,7 +5033,7 @@ /* Since there are no vandnp[sd] without AVX512DQ nor vandnph, use vp<logic>[dq]. */ - if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode) + if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode || <MODE>mode == V32BFmode) { suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "p"; @@ -5047,7 +5058,7 @@ (match_operand:VFB_128_256 1 "vector_operand") (match_operand:VFB_128_256 2 "vector_operand")))] "TARGET_SSE && <mask_avx512vl_condition> - && (!<mask_applied> || <ssescalarmode>mode != HFmode)" + && (!<mask_applied> || <ssescalarsize> != 16)" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_expand "<code><mode>3<mask_name>" @@ -5055,7 +5066,7 @@ (any_logic:VFB_512 (match_operand:VFB_512 1 "nonimmediate_operand") (match_operand:VFB_512 2 "nonimmediate_operand")))] - "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)" + "TARGET_AVX512F && (!<mask_applied> || <ssescalarsize> != 16)" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*<code><mode>3<mask_name>" @@ -5064,7 +5075,7 @@ (match_operand:VFB_128_256 1 "vector_operand" "%0,x,v,v") (match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] "TARGET_SSE && <mask_avx512vl_condition> - && (!<mask_applied> || <ssescalarmode>mode != HFmode) + && (!<mask_applied> || <ssescalarsize> != 16) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { char buf[128]; @@ -5087,6 +5098,8 @@ switch (get_attr_mode (insn)) { + case MODE_V16BF: + case MODE_V8BF: case MODE_V16HF: case MODE_V8HF: case MODE_V8SF: @@ -5132,7 +5145,7 @@ (match_operand:VFB_512 1 "nonimmediate_operand" "%v") (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2])) - && (!<mask_applied> || <ssescalarmode>mode != HFmode)" + && (!<mask_applied> || <ssescalarsize> != 16)" { char buf[128]; const char *ops; @@ -5143,7 +5156,7 @@ /* Since there are no v<logic>p[sd] without AVX512DQ nor v<logic>ph, use vp<logic>[dq]. */ - if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode) + if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode || <MODE>mode == V32BFmode) { suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "p"; diff --git a/gcc/testsuite/gcc.target/i386/avx2-bf16-vec-absneg.c b/gcc/testsuite/gcc.target/i386/avx2-bf16-vec-absneg.c new file mode 100755 index 00000000000..a3ee0b164f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-bf16-vec-absneg.c @@ -0,0 +1,85 @@ +/* { dg-do run { target avx2 } } */ +/* { dg-options "-O1 -mavx512bf16 -fdump-tree-vect-details -fdump-tree-optimized" } */ + +extern void abort (void); +static void do_test (void); + +#define DO_TEST do_test +#define AVX512BF16 +#include "avx512-check.h" + +__bf16 b_128[8], r_abs_128[8], r_neg_128[8]; +__bf16 b_256[16], r_abs_256[16], r_neg_256[16]; + +void +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) +abs_128 (void) +{ + for (int i = 0; i < 8; i++) + r_abs_128[i] = __builtin_fabsf16(b_128[i]); +} + +void +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) +neg_128 (void) +{ + for (int i = 0; i < 8; i++) + r_neg_128[i] = -b_128[i]; +} + +void +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) +abs_256 (void) +{ + for (int i = 0; i < 16; i++) + r_abs_256[i] = __builtin_fabsf16(b_256[i]); +} + +void +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf)) +neg_256 (void) +{ + for (int i = 0; i < 16; i++) + r_neg_256[i] = -b_256[i]; +} + +void +check_absneg_results (__bf16 *b, __bf16 *r_abs, __bf16 *r_neg, int len) +{ + for (int i = 0; i < len; i++) + { + __bf16 expected_abs = __builtin_fabsf16(b[i]); + __bf16 expected_neg = -b[i]; + if (r_abs[i] != expected_abs || r_neg[i] != expected_neg) + abort (); + } +} + +static void +__attribute__ ((noinline, noclone)) +do_test (void) +{ + /* Initialize test values */ + float float_b[16] = {-1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f}; + + for (int i = 0; i < 8; i++) + b_128[i] = (__bf16)float_b[i]; + + for (int i = 0; i < 16; i++) + b_256[i] = (__bf16)float_b[i]; + + abs_128 (); + neg_128 (); + check_absneg_results (b_128, r_abs_128, r_neg_128, 8); + + abs_256 (); + neg_256 (); + check_absneg_results (b_256, r_abs_256, r_neg_256, 16); +} + +/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)ABS_EXPR <vect} 2 "optimized" { target { ! ia32 } } } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/i386/avx512f-bf16-vec-absneg.c b/gcc/testsuite/gcc.target/i386/avx512f-bf16-vec-absneg.c new file mode 100755 index 00000000000..01c7ad77204 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-bf16-vec-absneg.c @@ -0,0 +1,66 @@ +/* { dg-do run { target avx512f } } */ +/* { dg-options "-O1 -mavx512bf16 -fdump-tree-vect-details -fdump-tree-optimized" } */ + +extern void abort (void); +static void do_test (void); + +#define DO_TEST do_test +#define AVX512BF16 +#include "avx512-check.h" + +__bf16 b_512[32], r_abs_512[32], r_neg_512[32]; + +void +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf, +target("prefer-vector-width=512"))) +abs_512 (void) +{ + for (int i = 0; i < 32; i++) + r_abs_512[i] = __builtin_fabsf16(b_512[i]); +} + +void +__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf, +target("prefer-vector-width=512"))) +neg_512 (void) +{ + for (int i = 0; i < 32; i++) + r_neg_512[i] = -b_512[i]; +} + +void +check_absneg_results (__bf16 *b, __bf16 *r_abs, __bf16 *r_neg, int len) +{ + for (int i = 0; i < len; i++) + { + __bf16 expected_abs = __builtin_fabsf16(b[i]); + __bf16 expected_neg = -b[i]; + if (r_abs[i] != expected_abs || r_neg[i] != expected_neg) + abort (); + } +} + +static void +__attribute__ ((noinline, noclone)) +do_test (void) +{ + /* Initialize test values */ + float float_b[32] = {-1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f, + -1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f}; + + for (int i = 0; i < 32; i++) + b_512[i] = (__bf16)float_b[i]; + + abs_512 (); + neg_512 (); + check_absneg_results (b_512, r_abs_512, r_neg_512, 32); +} + +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times {(?n)ABS_EXPR <vect} 1 "optimized" { target { ! ia32 } } } } */ \ No newline at end of file