Message ID | 20240912015453.53318-1-admin@levyhsu.com |
---|---|
State | New |
Headers | show |
Series | [v2] Enable V2BF/V4BF vec_cmp with AVX10.2 vcmppbf16 | expand |
On Thu, Sep 12, 2024 at 9:55 AM Levy Hsu <admin@levyhsu.com> wrote: > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? Ok. > > gcc/ChangeLog: > > * config/i386/i386.cc (ix86_get_mask_mode): > Enable BFmode for targetm.vectorize.get_mask_mode with AVX10.2. > * config/i386/mmx.md (vec_cmp<mode>qi): > Implement vec_cmpv2bfqi and vec_cmpv4bfqi. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/part-vect-vec_cmpbf.c: New test. > --- > gcc/config/i386/i386.cc | 3 ++- > gcc/config/i386/mmx.md | 17 ++++++++++++ > .../gcc.target/i386/part-vect-vec_cmpbf.c | 26 +++++++++++++++++++ > 3 files changed, 45 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 45320124b91..7dbae1d72e3 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -24682,7 +24682,8 @@ ix86_get_mask_mode (machine_mode data_mode) > /* AVX512FP16 only supports vector comparison > to kmask for _Float16. */ > || (TARGET_AVX512VL && TARGET_AVX512FP16 > - && GET_MODE_INNER (data_mode) == E_HFmode)) > + && GET_MODE_INNER (data_mode) == E_HFmode) > + || (TARGET_AVX10_2_256 && GET_MODE_INNER (data_mode) == E_BFmode)) > { > if (elem_size == 4 > || elem_size == 8 > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 4bc191b874b..95d9356694a 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -2290,6 +2290,23 @@ > DONE; > }) > > +;;This instruction does not generate floating point exceptions > +(define_expand "vec_cmp<mode>qi" > + [(set (match_operand:QI 0 "register_operand") > + (match_operator:QI 1 "" > + [(match_operand:VBF_32_64 2 "register_operand") > + (match_operand:VBF_32_64 3 "nonimmediate_operand")]))] > + "TARGET_AVX10_2_256" > +{ > + rtx op2 = lowpart_subreg (V8BFmode, > + force_reg (<MODE>mode, operands[2]), <MODE>mode); > + rtx op3 = lowpart_subreg (V8BFmode, > + force_reg (<MODE>mode, operands[3]), <MODE>mode); > + > + emit_insn (gen_vec_cmpv8bfqi (operands[0], operands[1], op2, op3)); > + DONE; > +}) > + > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > ;; > ;; Parallel half-precision floating point rounding operations. > diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c > new file mode 100644 > index 00000000000..0bb720b6432 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx10.2" } */ > +/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */ > + > +typedef __bf16 __attribute__((__vector_size__ (4))) v2bf; > +typedef __bf16 __attribute__((__vector_size__ (8))) v4bf; > + > + > +#define VCMPMN(type, op, name) \ > +type \ > +__attribute__ ((noinline, noclone)) \ > +vec_cmp_##type##type##name (type a, type b) \ > +{ \ > + return a op b; \ > +} > + > +VCMPMN (v4bf, <, lt) > +VCMPMN (v2bf, <, lt) > +VCMPMN (v4bf, <=, le) > +VCMPMN (v2bf, <=, le) > +VCMPMN (v4bf, >, gt) > +VCMPMN (v2bf, >, gt) > +VCMPMN (v4bf, >=, ge) > +VCMPMN (v2bf, >=, ge) > +VCMPMN (v4bf, ==, eq) > +VCMPMN (v2bf, ==, eq) > -- > 2.31.1 >
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 45320124b91..7dbae1d72e3 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -24682,7 +24682,8 @@ ix86_get_mask_mode (machine_mode data_mode) /* AVX512FP16 only supports vector comparison to kmask for _Float16. */ || (TARGET_AVX512VL && TARGET_AVX512FP16 - && GET_MODE_INNER (data_mode) == E_HFmode)) + && GET_MODE_INNER (data_mode) == E_HFmode) + || (TARGET_AVX10_2_256 && GET_MODE_INNER (data_mode) == E_BFmode)) { if (elem_size == 4 || elem_size == 8 diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 4bc191b874b..95d9356694a 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2290,6 +2290,23 @@ DONE; }) +;;This instruction does not generate floating point exceptions +(define_expand "vec_cmp<mode>qi" + [(set (match_operand:QI 0 "register_operand") + (match_operator:QI 1 "" + [(match_operand:VBF_32_64 2 "register_operand") + (match_operand:VBF_32_64 3 "nonimmediate_operand")]))] + "TARGET_AVX10_2_256" +{ + rtx op2 = lowpart_subreg (V8BFmode, + force_reg (<MODE>mode, operands[2]), <MODE>mode); + rtx op3 = lowpart_subreg (V8BFmode, + force_reg (<MODE>mode, operands[3]), <MODE>mode); + + emit_insn (gen_vec_cmpv8bfqi (operands[0], operands[1], op2, op3)); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel half-precision floating point rounding operations. diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c new file mode 100644 index 00000000000..0bb720b6432 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */ + +typedef __bf16 __attribute__((__vector_size__ (4))) v2bf; +typedef __bf16 __attribute__((__vector_size__ (8))) v4bf; + + +#define VCMPMN(type, op, name) \ +type \ +__attribute__ ((noinline, noclone)) \ +vec_cmp_##type##type##name (type a, type b) \ +{ \ + return a op b; \ +} + +VCMPMN (v4bf, <, lt) +VCMPMN (v2bf, <, lt) +VCMPMN (v4bf, <=, le) +VCMPMN (v2bf, <=, le) +VCMPMN (v4bf, >, gt) +VCMPMN (v2bf, >, gt) +VCMPMN (v4bf, >=, ge) +VCMPMN (v2bf, >=, ge) +VCMPMN (v4bf, ==, eq) +VCMPMN (v2bf, ==, eq)