diff mbox series

i386: Support partial vectorized V2BF/V4BF smaxmin

Message ID 20240902084202.1862005-1-admin@levyhsu.com
State New
Headers show
Series i386: Support partial vectorized V2BF/V4BF smaxmin | expand

Commit Message

Levy Hsu Sept. 2, 2024, 8:41 a.m. UTC
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

This patch supports sminmax for partial vectorized V2BF/V4BF.

gcc/ChangeLog:

	* config/i386/mmx.md (<code><mode>3): New define_expand for V2BF/V4BFsmaxmin

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test.
---
 gcc/config/i386/mmx.md                        | 19 ++++++++++
 .../avx10_2-partial-bf-vector-smaxmin-1.c     | 36 +++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c

Comments

Hongtao Liu Sept. 3, 2024, 2:01 a.m. UTC | #1
On Mon, Sep 2, 2024 at 4:42 PM Levy Hsu <admin@levyhsu.com> wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
Ok.
>
> This patch supports sminmax for partial vectorized V2BF/V4BF.
>
> gcc/ChangeLog:
>
>         * config/i386/mmx.md (<code><mode>3): New define_expand for V2BF/V4BFsmaxmin
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test.
> ---
>  gcc/config/i386/mmx.md                        | 19 ++++++++++
>  .../avx10_2-partial-bf-vector-smaxmin-1.c     | 36 +++++++++++++++++++
>  2 files changed, 55 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 9116ddb5321..3f12a1349ab 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -2098,6 +2098,25 @@
>    DONE;
>  })
>
> +(define_expand "<code><mode>3"
> +  [(set (match_operand:VBF_32_64 0 "register_operand")
> +    (smaxmin:VBF_32_64
> +      (match_operand:VBF_32_64 1 "nonimmediate_operand")
> +      (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
> +  "TARGET_AVX10_2_256"
> +{
> +  rtx op0 = gen_reg_rtx (V8BFmode);
> +  rtx op1 = lowpart_subreg (V8BFmode,
> +                           force_reg (<MODE>mode, operands[1]), <MODE>mode);
> +  rtx op2 = lowpart_subreg (V8BFmode,
> +                           force_reg (<MODE>mode, operands[2]), <MODE>mode);
> +
> +  emit_insn (gen_<code>v8bf3 (op0, op1, op2));
> +
> +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
> +  DONE;
> +})
> +
>  (define_expand "sqrt<mode>2"
>    [(set (match_operand:VHF_32_64 0 "register_operand")
>         (sqrt:VHF_32_64
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
> new file mode 100644
> index 00000000000..0a7cc58e29d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mavx10.2 -Ofast" } */
> +/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
> +/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
> +
> +void
> +maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
> +{
> +  int i;
> +  for (i = 0; i < 4; i++)
> +    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
> +}
> +
> +void
> +maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
> +{
> +  int i;
> +  for (i = 0; i < 2; i++)
> +    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
> +}
> +
> +void
> +minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
> +{
> +  int i;
> +  for (i = 0; i < 4; i++)
> +    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
> +}
> +
> +void
> +minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
> +{
> +  int i;
> +  for (i = 0; i < 2; i++)
> +    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
> +}
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9116ddb5321..3f12a1349ab 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2098,6 +2098,25 @@ 
   DONE;
 })
 
+(define_expand "<code><mode>3"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+    (smaxmin:VBF_32_64
+      (match_operand:VBF_32_64 1 "nonimmediate_operand")
+      (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
+  "TARGET_AVX10_2_256"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode,
+			    force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode,
+			    force_reg (<MODE>mode, operands[2]), <MODE>mode);
+
+  emit_insn (gen_<code>v8bf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
 (define_expand "sqrt<mode>2"
   [(set (match_operand:VHF_32_64 0 "register_operand")
 	(sqrt:VHF_32_64
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
new file mode 100644
index 00000000000..0a7cc58e29d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
@@ -0,0 +1,36 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx10.2 -Ofast" } */
+/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
+/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
+
+void
+maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}