Message ID | 20210701061648.9447-54-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Support all AVX512FP16 intrinsics. | expand |
On Thu, Jul 1, 2021 at 2:18 PM liuhongt <hongtao.liu@intel.com> wrote: > > gcc/ChangeLog: > > * config/i386/i386-features.c (i386-features.c): Handle > E_HFmode. > * config/i386/i386.md (sqrthf2): New expander. > (*sqrt<mode>2_sse): Extend to MODEFH. > * config/i386/sse.md > (*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>): > Extend to VFH_128. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512fp16-builtin-sqrt-1.c: New test. > * gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c: New test. > --- > gcc/config/i386/i386-features.c | 15 +++++++++++---- > gcc/config/i386/i386.md | 12 +++++++++--- > gcc/config/i386/sse.md | 8 ++++---- > .../i386/avx512fp16-builtin-sqrt-1.c | 18 ++++++++++++++++++ > .../i386/avx512fp16vl-builtin-sqrt-1.c | 19 +++++++++++++++++++ > 5 files changed, 61 insertions(+), 11 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c > > diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c > index a25769ae478..0b5a1a3af53 100644 > --- a/gcc/config/i386/i386-features.c > +++ b/gcc/config/i386/i386-features.c > @@ -2238,15 +2238,22 @@ remove_partial_avx_dependency (void) > > rtx zero; > machine_mode dest_vecmode; > - if (dest_mode == E_SFmode) > + switch (dest_mode) > { > + case E_HFmode: > + dest_vecmode = V8HFmode; > + zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0); > + break; > + case E_SFmode: > dest_vecmode = V4SFmode; > zero = v4sf_const0; > - } > - else > - { > + break; > + case E_DFmode: > dest_vecmode = V2DFmode; > zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); > + break; > + default: > + gcc_unreachable (); > } > > /* Change source to vector mode. */ > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index a85c23d74f1..81c893c60de 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -16561,9 +16561,9 @@ (define_expand "rsqrtsf2" > }) > > (define_insn "*sqrt<mode>2_sse" > - [(set (match_operand:MODEF 0 "register_operand" "=v,v,v") > - (sqrt:MODEF > - (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))] > + [(set (match_operand:MODEFH 0 "register_operand" "=v,v,v") > + (sqrt:MODEFH > + (match_operand:MODEFH 1 "nonimmediate_operand" "0,v,m")))] > "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" > "@ > %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1} > @@ -16583,6 +16583,12 @@ (define_insn "*sqrt<mode>2_sse" > ] > (symbol_ref "true")))]) > As mentioned by uros, l think this also better has a separate pattern for hf. > +(define_expand "sqrthf2" > + [(set (match_operand:HF 0 "register_operand") > + (sqrt:HF > + (match_operand:HF 1 "nonimmediate_operand")))] > + "TARGET_AVX512FP16") > + > (define_expand "sqrt<mode>2" > [(set (match_operand:MODEF 0 "register_operand") > (sqrt:MODEF > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 2c3dba5bdb0..b47e7f0b82a 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -2389,12 +2389,12 @@ (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>" > (set_attr "mode" "<ssescalarmode>")]) > > (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>" > - [(set (match_operand:VF_128 0 "register_operand" "=x,v") > - (vec_merge:VF_128 > - (vec_duplicate:VF_128 > + [(set (match_operand:VFH_128 0 "register_operand" "=x,v") > + (vec_merge:VFH_128 > + (vec_duplicate:VFH_128 > (sqrt:<ssescalarmode> > (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))) > - (match_operand:VF_128 2 "register_operand" "0,v") > + (match_operand:VFH_128 2 "register_operand" "0,v") > (const_int 1)))] > "TARGET_SSE" > "@ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c > new file mode 100644 > index 00000000000..38cdf23fef7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -mavx512fp16" } */ > + > +_Float16 > +f1 (_Float16 x) > +{ > + return __builtin_sqrtf16 (x); > +} > + > +void > +f2 (_Float16* __restrict psrc, _Float16* __restrict pdst) > +{ > + for (int i = 0; i != 32; i++) > + pdst[i] = __builtin_sqrtf16 (psrc[i]); > +} > + > +/* { dg-final { scan-assembler-times "vsqrtsh\[^\n\r\]*xmm\[0-9\]" 1 } } */ > +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*zmm\[0-9\]" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c > new file mode 100644 > index 00000000000..08deb3ea470 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ > + > +void > +f1 (_Float16* __restrict psrc, _Float16* __restrict pdst) > +{ > + for (int i = 0; i != 8; i++) > + pdst[i] = __builtin_sqrtf16 (psrc[i]); > +} > + > +void > +f2 (_Float16* __restrict psrc, _Float16* __restrict pdst) > +{ > + for (int i = 0; i != 16; i++) > + pdst[i] = __builtin_sqrtf16 (psrc[i]); > +} > + > +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*xmm\[0-9\]" 1 } } */ > +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*ymm\[0-9\]" 1 } } */ > -- > 2.18.1 >
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index a25769ae478..0b5a1a3af53 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -2238,15 +2238,22 @@ remove_partial_avx_dependency (void) rtx zero; machine_mode dest_vecmode; - if (dest_mode == E_SFmode) + switch (dest_mode) { + case E_HFmode: + dest_vecmode = V8HFmode; + zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0); + break; + case E_SFmode: dest_vecmode = V4SFmode; zero = v4sf_const0; - } - else - { + break; + case E_DFmode: dest_vecmode = V2DFmode; zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); + break; + default: + gcc_unreachable (); } /* Change source to vector mode. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a85c23d74f1..81c893c60de 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16561,9 +16561,9 @@ (define_expand "rsqrtsf2" }) (define_insn "*sqrt<mode>2_sse" - [(set (match_operand:MODEF 0 "register_operand" "=v,v,v") - (sqrt:MODEF - (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))] + [(set (match_operand:MODEFH 0 "register_operand" "=v,v,v") + (sqrt:MODEFH + (match_operand:MODEFH 1 "nonimmediate_operand" "0,v,m")))] "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" "@ %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1} @@ -16583,6 +16583,12 @@ (define_insn "*sqrt<mode>2_sse" ] (symbol_ref "true")))]) +(define_expand "sqrthf2" + [(set (match_operand:HF 0 "register_operand") + (sqrt:HF + (match_operand:HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + (define_expand "sqrt<mode>2" [(set (match_operand:MODEF 0 "register_operand") (sqrt:MODEF diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 2c3dba5bdb0..b47e7f0b82a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2389,12 +2389,12 @@ (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>" (set_attr "mode" "<ssescalarmode>")]) (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>" - [(set (match_operand:VF_128 0 "register_operand" "=x,v") - (vec_merge:VF_128 - (vec_duplicate:VF_128 + [(set (match_operand:VFH_128 0 "register_operand" "=x,v") + (vec_merge:VFH_128 + (vec_duplicate:VFH_128 (sqrt:<ssescalarmode> (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))) - (match_operand:VF_128 2 "register_operand" "0,v") + (match_operand:VFH_128 2 "register_operand" "0,v") (const_int 1)))] "TARGET_SSE" "@ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c new file mode 100644 index 00000000000..38cdf23fef7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-sqrt-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16" } */ + +_Float16 +f1 (_Float16 x) +{ + return __builtin_sqrtf16 (x); +} + +void +f2 (_Float16* __restrict psrc, _Float16* __restrict pdst) +{ + for (int i = 0; i != 32; i++) + pdst[i] = __builtin_sqrtf16 (psrc[i]); +} + +/* { dg-final { scan-assembler-times "vsqrtsh\[^\n\r\]*xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*zmm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c new file mode 100644 index 00000000000..08deb3ea470 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-builtin-sqrt-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ + +void +f1 (_Float16* __restrict psrc, _Float16* __restrict pdst) +{ + for (int i = 0; i != 8; i++) + pdst[i] = __builtin_sqrtf16 (psrc[i]); +} + +void +f2 (_Float16* __restrict psrc, _Float16* __restrict pdst) +{ + for (int i = 0; i != 16; i++) + pdst[i] = __builtin_sqrtf16 (psrc[i]); +} + +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[^\n\r\]*ymm\[0-9\]" 1 } } */