Message ID | 20211008093121.88264-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Refine movhfcc. | expand |
On Fri, Oct 8, 2021 at 5:31 PM liuhongt <hongtao.liu@intel.com> wrote: > > For AVX512-FP16, HFmode only supports vcmpsh whose dest is mask > register, so for movhfcc, it's > > vcmpsh op2, op1, %k1 > vmovsh op1, op2{%k1} > mov op2, dest > > gcc/ChangeLog: > > PR target/102639 > * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Handle > HFmode. > (ix86_use_mask_cmp_p): Ditto. > (ix86_expand_sse_movcc): Ditto. > * config/i386/i386.md (setcc_hf_mask): New define_insn. > (movhf_mask): Ditto. > (UNSPEC_MOVCC_MASK): New unspec. > * config/i386/sse.md (UNSPEC_PCMP): Move to i386.md. > > gcc/testsuite/ChangeLog: > * g++.target/i386/pr102639.C: New test. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Also no new failures for AVX512FP16 runtime tests under for sde{-m32,}. Committed to trunk. > --- > gcc/config/i386/i386-expand.c | 19 ++++++++++--- > gcc/config/i386/i386.md | 34 +++++++++++++++++++++++- > gcc/config/i386/sse.md | 1 - > gcc/testsuite/g++.target/i386/pr102639.C | 19 +++++++++++++ > 4 files changed, 67 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/g++.target/i386/pr102639.C > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 4780b993917..3c4a07d4d7d 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -3613,6 +3613,10 @@ ix86_valid_mask_cmp_mode (machine_mode mode) > if (TARGET_XOP && !TARGET_AVX512F) > return false; > > + /* HFmode only supports vcmpsh whose dest is mask register. */ > + if (TARGET_AVX512FP16 && mode == HFmode) > + return true; > + > /* AVX512F is needed for mask operation. */ > if (!(TARGET_AVX512F && VECTOR_MODE_P (mode))) > return false; > @@ -3634,7 +3638,9 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode, > { > int vector_size = GET_MODE_SIZE (mode); > > - if (vector_size < 16) > + if (cmp_mode == HFmode) > + return true; > + else if (vector_size < 16) > return false; > else if (vector_size == 64) > return true; > @@ -3750,7 +3756,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) > && GET_MODE_CLASS (cmpmode) == MODE_INT) > { > gcc_assert (ix86_valid_mask_cmp_mode (mode)); > - /* Using vector move with mask register. */ > + /* Using scalar/vector move with mask register. */ > cmp = force_reg (cmpmode, cmp); > /* Optimize for mask zero. */ > op_true = (op_true != CONST0_RTX (mode) > @@ -3769,8 +3775,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) > std::swap (op_true, op_false); > } > > - rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp); > - emit_insn (gen_rtx_SET (dest, vec_merge)); > + if (mode == HFmode) > + emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp)); > + else > + { > + rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp); > + emit_insn (gen_rtx_SET (dest, vec_merge)); > + } > return; > } > else if (vector_all_ones_operand (op_true, mode) > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 04cb3bf6a33..c7ae4ac5fbc 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -117,6 +117,7 @@ (define_c_enum "unspec" [ > ;; For SSE/MMX support: > UNSPEC_FIX_NOTRUNC > UNSPEC_MASKMOV > + UNSPEC_MOVCC_MASK > UNSPEC_MOVMSK > UNSPEC_BLENDV > UNSPEC_PSHUFB > @@ -125,8 +126,9 @@ (define_c_enum "unspec" [ > UNSPEC_RSQRT > UNSPEC_PSADBW > > - ;; For AVX512F support > + ;; For AVX/AVX512F support > UNSPEC_SCALEF > + UNSPEC_PCMP > > ;; Generic math support > UNSPEC_IEEE_MIN ; not commutative > @@ -13608,6 +13610,20 @@ (define_insn "setcc_<mode>_sse" > (set_attr "length_immediate" "1") > (set_attr "prefix" "orig,vex") > (set_attr "mode" "<MODE>")]) > + > +(define_insn "setcc_hf_mask" > + [(set (match_operand:QI 0 "register_operand" "=k") > + (unspec:QI > + [(match_operand:HF 1 "register_operand" "v") > + (match_operand:HF 2 "nonimmediate_operand" "vm") > + (match_operand:SI 3 "const_0_to_31_operand" "n")] > + UNSPEC_PCMP))] > + "TARGET_AVX512FP16" > + "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}" > + [(set_attr "type" "ssecmp") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > + > > ;; Basic conditional jump instructions. > > @@ -19841,6 +19857,22 @@ (define_peephole2 > operands[9] = replace_rtx (operands[6], operands[0], operands[1], true); > }) > > +(define_insn "movhf_mask" > + [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v") > + (unspec:HF > + [(match_operand:HF 1 "nonimmediate_operand" "m,v,v") > + (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C") > + (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")] > + UNSPEC_MOVCC_MASK))] > + "TARGET_AVX512FP16" > + "@ > + vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} > + vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} > + vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}" > + [(set_attr "type" "ssemov") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > + > (define_expand "movhfcc" > [(set (match_operand:HF 0 "register_operand") > (if_then_else:HF > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 4559b0ce9c9..a3c4a3f1e62 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -67,7 +67,6 @@ (define_c_enum "unspec" [ > UNSPEC_PCLMUL > > ;; For AVX support > - UNSPEC_PCMP > UNSPEC_VPERMIL > UNSPEC_VPERMIL2 > UNSPEC_VPERMIL2F128 > diff --git a/gcc/testsuite/g++.target/i386/pr102639.C b/gcc/testsuite/g++.target/i386/pr102639.C > new file mode 100644 > index 00000000000..f094e4d1b43 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr102639.C > @@ -0,0 +1,19 @@ > +/* PR target/102639 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -std=c++14 -mavx512fp16" } */ > +/* { dg-final { scan-assembler-times "vminsh" 1 } } */ > + > +typedef _Float16 v16hf __attribute__((vector_size(2))); > +v16hf vcond_v16hfv16hfge_b, vcond_v16hfv16hfge_c, vcond_v16hfv16hfge_d, > + __attribute__vcond_v16hfv16hfge_a; > +v16hf __attribute__vcond_v16hfv16hfge() { > + return __attribute__vcond_v16hfv16hfge_a >= vcond_v16hfv16hfge_b > + ? vcond_v16hfv16hfge_c > + : vcond_v16hfv16hfge_d; > +} > + > +v16hf __attribute__vcond_v16hfv16hfmax() { > + return __attribute__vcond_v16hfv16hfge_a < vcond_v16hfv16hfge_b > + ? __attribute__vcond_v16hfv16hfge_a > + : vcond_v16hfv16hfge_b; > +} > -- > 2.18.1 >
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 4780b993917..3c4a07d4d7d 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3613,6 +3613,10 @@ ix86_valid_mask_cmp_mode (machine_mode mode) if (TARGET_XOP && !TARGET_AVX512F) return false; + /* HFmode only supports vcmpsh whose dest is mask register. */ + if (TARGET_AVX512FP16 && mode == HFmode) + return true; + /* AVX512F is needed for mask operation. */ if (!(TARGET_AVX512F && VECTOR_MODE_P (mode))) return false; @@ -3634,7 +3638,9 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode, { int vector_size = GET_MODE_SIZE (mode); - if (vector_size < 16) + if (cmp_mode == HFmode) + return true; + else if (vector_size < 16) return false; else if (vector_size == 64) return true; @@ -3750,7 +3756,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) && GET_MODE_CLASS (cmpmode) == MODE_INT) { gcc_assert (ix86_valid_mask_cmp_mode (mode)); - /* Using vector move with mask register. */ + /* Using scalar/vector move with mask register. */ cmp = force_reg (cmpmode, cmp); /* Optimize for mask zero. */ op_true = (op_true != CONST0_RTX (mode) @@ -3769,8 +3775,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) std::swap (op_true, op_false); } - rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp); - emit_insn (gen_rtx_SET (dest, vec_merge)); + if (mode == HFmode) + emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp)); + else + { + rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp); + emit_insn (gen_rtx_SET (dest, vec_merge)); + } return; } else if (vector_all_ones_operand (op_true, mode) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 04cb3bf6a33..c7ae4ac5fbc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -117,6 +117,7 @@ (define_c_enum "unspec" [ ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC UNSPEC_MASKMOV + UNSPEC_MOVCC_MASK UNSPEC_MOVMSK UNSPEC_BLENDV UNSPEC_PSHUFB @@ -125,8 +126,9 @@ (define_c_enum "unspec" [ UNSPEC_RSQRT UNSPEC_PSADBW - ;; For AVX512F support + ;; For AVX/AVX512F support UNSPEC_SCALEF + UNSPEC_PCMP ;; Generic math support UNSPEC_IEEE_MIN ; not commutative @@ -13608,6 +13610,20 @@ (define_insn "setcc_<mode>_sse" (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") (set_attr "mode" "<MODE>")]) + +(define_insn "setcc_hf_mask" + [(set (match_operand:QI 0 "register_operand" "=k") + (unspec:QI + [(match_operand:HF 1 "register_operand" "v") + (match_operand:HF 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_31_operand" "n")] + UNSPEC_PCMP))] + "TARGET_AVX512FP16" + "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + ;; Basic conditional jump instructions. @@ -19841,6 +19857,22 @@ (define_peephole2 operands[9] = replace_rtx (operands[6], operands[0], operands[1], true); }) +(define_insn "movhf_mask" + [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v") + (unspec:HF + [(match_operand:HF 1 "nonimmediate_operand" "m,v,v") + (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C") + (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")] + UNSPEC_MOVCC_MASK))] + "TARGET_AVX512FP16" + "@ + vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} + vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} + vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + (define_expand "movhfcc" [(set (match_operand:HF 0 "register_operand") (if_then_else:HF diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4559b0ce9c9..a3c4a3f1e62 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -67,7 +67,6 @@ (define_c_enum "unspec" [ UNSPEC_PCLMUL ;; For AVX support - UNSPEC_PCMP UNSPEC_VPERMIL UNSPEC_VPERMIL2 UNSPEC_VPERMIL2F128 diff --git a/gcc/testsuite/g++.target/i386/pr102639.C b/gcc/testsuite/g++.target/i386/pr102639.C new file mode 100644 index 00000000000..f094e4d1b43 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr102639.C @@ -0,0 +1,19 @@ +/* PR target/102639 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -std=c++14 -mavx512fp16" } */ +/* { dg-final { scan-assembler-times "vminsh" 1 } } */ + +typedef _Float16 v16hf __attribute__((vector_size(2))); +v16hf vcond_v16hfv16hfge_b, vcond_v16hfv16hfge_c, vcond_v16hfv16hfge_d, + __attribute__vcond_v16hfv16hfge_a; +v16hf __attribute__vcond_v16hfv16hfge() { + return __attribute__vcond_v16hfv16hfge_a >= vcond_v16hfv16hfge_b + ? vcond_v16hfv16hfge_c + : vcond_v16hfv16hfge_d; +} + +v16hf __attribute__vcond_v16hfv16hfmax() { + return __attribute__vcond_v16hfv16hfge_a < vcond_v16hfv16hfge_b + ? __attribute__vcond_v16hfv16hfge_a + : vcond_v16hfv16hfge_b; +}