Message ID | 20240902084202.1862005-1-admin@levyhsu.com |
---|---|
State | New |
Headers | show |
Series | i386: Support partial vectorized V2BF/V4BF smaxmin | expand |
On Mon, Sep 2, 2024 at 4:42 PM Levy Hsu <admin@levyhsu.com> wrote: > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? Ok. > > This patch supports sminmax for partial vectorized V2BF/V4BF. > > gcc/ChangeLog: > > * config/i386/mmx.md (<code><mode>3): New define_expand for V2BF/V4BFsmaxmin > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test. > --- > gcc/config/i386/mmx.md | 19 ++++++++++ > .../avx10_2-partial-bf-vector-smaxmin-1.c | 36 +++++++++++++++++++ > 2 files changed, 55 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 9116ddb5321..3f12a1349ab 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -2098,6 +2098,25 @@ > DONE; > }) > > +(define_expand "<code><mode>3" > + [(set (match_operand:VBF_32_64 0 "register_operand") > + (smaxmin:VBF_32_64 > + (match_operand:VBF_32_64 1 "nonimmediate_operand") > + (match_operand:VBF_32_64 2 "nonimmediate_operand")))] > + "TARGET_AVX10_2_256" > +{ > + rtx op0 = gen_reg_rtx (V8BFmode); > + rtx op1 = lowpart_subreg (V8BFmode, > + force_reg (<MODE>mode, operands[1]), <MODE>mode); > + rtx op2 = lowpart_subreg (V8BFmode, > + force_reg (<MODE>mode, operands[2]), <MODE>mode); > + > + emit_insn (gen_<code>v8bf3 (op0, op1, op2)); > + > + emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode)); > + DONE; > +}) > + > (define_expand "sqrt<mode>2" > [(set (match_operand:VHF_32_64 0 "register_operand") > (sqrt:VHF_32_64 > diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c > new file mode 100644 > index 00000000000..0a7cc58e29d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mavx10.2 -Ofast" } */ > +/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */ > +/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */ > + > +void > +maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) > +{ > + int i; > + for (i = 0; i < 4; i++) > + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; > +} > + > +void > +maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) > +{ > + int i; > + for (i = 0; i < 2; i++) > + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; > +} > + > +void > +minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) > +{ > + int i; > + for (i = 0; i < 4; i++) > + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; > +} > + > +void > +minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) > +{ > + int i; > + for (i = 0; i < 2; i++) > + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; > +} > -- > 2.31.1 >
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 9116ddb5321..3f12a1349ab 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2098,6 +2098,25 @@ DONE; }) +(define_expand "<code><mode>3" + [(set (match_operand:VBF_32_64 0 "register_operand") + (smaxmin:VBF_32_64 + (match_operand:VBF_32_64 1 "nonimmediate_operand") + (match_operand:VBF_32_64 2 "nonimmediate_operand")))] + "TARGET_AVX10_2_256" +{ + rtx op0 = gen_reg_rtx (V8BFmode); + rtx op1 = lowpart_subreg (V8BFmode, + force_reg (<MODE>mode, operands[1]), <MODE>mode); + rtx op2 = lowpart_subreg (V8BFmode, + force_reg (<MODE>mode, operands[2]), <MODE>mode); + + emit_insn (gen_<code>v8bf3 (op0, op1, op2)); + + emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode)); + DONE; +}) + (define_expand "sqrt<mode>2" [(set (match_operand:VHF_32_64 0 "register_operand") (sqrt:VHF_32_64 diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c new file mode 100644 index 00000000000..0a7cc58e29d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx10.2 -Ofast" } */ +/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */ +/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */ + +void +maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 4; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 2; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 4; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 2; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +}