diff mbox series

[2/7] Lower AVX512 kmask comparison back to AVX2 comparison when op_{true, false} is vector -1/0.

Message ID 20240627082307.1166985-3-hongtao.liu@intel.com
State New
Headers show
Series Remove vcond{,u,eq}<mode> expanders. | expand

Commit Message

liuhongt June 27, 2024, 8:23 a.m. UTC
gcc/ChangeLog
	PR target/115517
	* config/i386/sse.md
	(*<avx512>_cvtmask2<ssemodesuffix><mode>_not): New pre_reload
	splitter.
	(*<avx512>_cvtmask2<ssemodesuffix><mode>_not): Ditto.
	(*avx2_pcmp<mode>3_6): Ditto.
	(*avx2_pcmp<mode>3_7): Ditto.
---
 gcc/config/i386/sse.md | 97 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)

Comments

Richard Biener June 27, 2024, 10:10 a.m. UTC | #1
On Thu, Jun 27, 2024 at 10:30 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> gcc/ChangeLog

In PR115659 Kewen notes that ISEL (and possibly folding) could do a
better job with
these.  In addition to the mentioned issues we can also try whether the target
can handle an alternate mask mode.  So instead of gating with

          /* Try to fold x CMP y ? -1 : 0 to x CMP y.  */
          if (can_compute_op0
              && integer_minus_onep (op1)
              && integer_zerop (op2)
              && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))

when TYPE_MODE (TREE_TYPE (lhs)) != TYPE_MODE (TREE_TYPE (op0)) see
if when we do

  build_truth_vector_type_for_mode (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)),
                                                          TYPE_MODE
(TREE_TYPE (op0));

and use that for the LHS type whether we can handle the resulting compare
(can_compute_op0 with this mode) and rewrite it accordingly to make
the transform.

Richard.

>         PR target/115517
>         * config/i386/sse.md
>         (*<avx512>_cvtmask2<ssemodesuffix><mode>_not): New pre_reload
>         splitter.
>         (*<avx512>_cvtmask2<ssemodesuffix><mode>_not): Ditto.
>         (*avx2_pcmp<mode>3_6): Ditto.
>         (*avx2_pcmp<mode>3_7): Ditto.
> ---
>  gcc/config/i386/sse.md | 97 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 97 insertions(+)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 1148ac84f3d..822159a869b 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -9986,6 +9986,24 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
>    [(set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
> +  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
> +       (vec_merge:VI12_AVX512VL
> +         (match_operand:VI12_AVX512VL 2 "const0_operand")
> +         (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand")
> +         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
> +  "TARGET_AVX512BW && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 4)
> +       (not:<avx512fmaskmode> (match_dup 1)))
> +   (set (match_dup 0)
> +       (vec_merge:VI12_AVX512VL
> +         (match_dup 3)
> +         (match_dup 2)
> +         (match_dup 4)))]
> +  "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
> +
>  (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
>    [(set (match_operand:VI48_AVX512VL 0 "register_operand")
>         (vec_merge:VI48_AVX512VL
> @@ -10024,6 +10042,24 @@ (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
> +  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
> +       (vec_merge:VI48_AVX512VL
> +         (match_operand:VI48_AVX512VL 2 "const0_operand")
> +         (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand")
> +         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
> +  "TARGET_AVX512F && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 4)
> +       (not:<avx512fmaskmode> (match_dup 1)))
> +   (set (match_dup 0)
> +       (vec_merge:VI48_AVX512VL
> +         (match_dup 3)
> +         (match_dup 2)
> +         (match_dup 4)))]
> +  "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
> +
>  (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog_false_dep"
>    [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
>         (vec_merge:VI48_AVX512VL
> @@ -17675,6 +17711,67 @@ (define_insn_and_split "*avx2_pcmp<mode>3_5"
>      std::swap (operands[1], operands[2]);
>  })
>
> +(define_int_attr pcmp_usmin
> +  [(UNSPEC_PCMP "smin") (UNSPEC_UNSIGNED_PCMP "umin")])
> +
> +(define_insn_and_split "*avx2_pcmp<mode>3_6"
> + [(set (match_operand:VI_128_256  0 "register_operand")
> +       (vec_merge:VI_128_256
> +         (match_operand:VI_128_256 1 "vector_all_ones_operand")
> +         (match_operand:VI_128_256 2 "const0_operand")
> +         (unspec:<avx512fmaskmode>
> +           [(match_operand:VI_128_256 3 "nonimmediate_operand")
> +            (match_operand:VI_128_256 4 "nonimmediate_operand")
> +            (match_operand:SI 5 "const_0_to_7_operand")]
> +            UNSPEC_PCMP_ITER)))]
> +  "TARGET_AVX512VL && ix86_pre_reload_split ()
> +   && (INTVAL (operands[5]) == 2 || INTVAL (operands[5]) == 5)"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  rtx dst_min = gen_reg_rtx (<MODE>mode);
> +
> +  if (MEM_P (operands[3]) && MEM_P (operands[4]))
> +    operands[3] = force_reg (<MODE>mode, operands[3]);
> +  emit_insn (gen_<pcmp_usmin><mode>3 (dst_min, operands[3], operands[4]));
> +  rtx eq_op = INTVAL (operands[5]) == 2 ? operands[3] : operands[4];
> +  emit_move_insn (operands[0], gen_rtx_EQ (<MODE>mode, eq_op, dst_min));
> +  DONE;
> +})
> +
> +(define_insn_and_split "*avx2_pcmp<mode>3_7"
> + [(set (match_operand:VI_128_256  0 "register_operand")
> +       (vec_merge:VI_128_256
> +         (match_operand:VI_128_256 1 "const0_operand")
> +         (match_operand:VI_128_256 2 "vector_all_ones_operand")
> +         (unspec:<avx512fmaskmode>
> +           [(match_operand:VI_128_256 3 "nonimmediate_operand")
> +            (match_operand:VI_128_256 4 "nonimmediate_operand")
> +            (match_operand:SI 5 "const_0_to_7_operand")]
> +            UNSPEC_PCMP_ITER)))]
> +  "TARGET_AVX512VL && ix86_pre_reload_split ()
> +     /* NE is commutative.  */
> +   && (INTVAL (operands[5]) == 4
> +         /* LE, 3 must be register.  */
> +       || INTVAL (operands[5]) == 2
> +         /* NLT aka GE, 4 must be register and we swap operands.  */
> +       || INTVAL (operands[5]) == 5)"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  if (INTVAL (operands[5]) == 5)
> +    std::swap (operands[3], operands[4]);
> +
> +  if (MEM_P (operands[3]))
> +    operands[3] = force_reg (<MODE>mode, operands[3]);
> +  enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
> +  emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
> +                                              operands[3], operands[4]));
> +  DONE;
> +})
> +
>  (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
>    [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
>         (unspec:<avx512fmaskmode>
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1148ac84f3d..822159a869b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9986,6 +9986,24 @@  (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
+	(vec_merge:VI12_AVX512VL
+	  (match_operand:VI12_AVX512VL 2 "const0_operand")
+	  (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand")
+	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
+  "TARGET_AVX512BW && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 4)
+	(not:<avx512fmaskmode> (match_dup 1)))
+   (set (match_dup 0)
+	(vec_merge:VI12_AVX512VL
+	  (match_dup 3)
+	  (match_dup 2)
+	  (match_dup 4)))]
+  "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
+
 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand")
 	(vec_merge:VI48_AVX512VL
@@ -10024,6 +10042,24 @@  (define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+	(vec_merge:VI48_AVX512VL
+	  (match_operand:VI48_AVX512VL 2 "const0_operand")
+	  (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand")
+	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 4)
+	(not:<avx512fmaskmode> (match_dup 1)))
+   (set (match_dup 0)
+	(vec_merge:VI48_AVX512VL
+	  (match_dup 3)
+	  (match_dup 2)
+	  (match_dup 4)))]
+  "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
+
 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog_false_dep"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
 	(vec_merge:VI48_AVX512VL
@@ -17675,6 +17711,67 @@  (define_insn_and_split "*avx2_pcmp<mode>3_5"
     std::swap (operands[1], operands[2]);
 })
 
+(define_int_attr pcmp_usmin
+  [(UNSPEC_PCMP "smin") (UNSPEC_UNSIGNED_PCMP "umin")])
+
+(define_insn_and_split "*avx2_pcmp<mode>3_6"
+ [(set (match_operand:VI_128_256  0 "register_operand")
+	(vec_merge:VI_128_256
+	  (match_operand:VI_128_256 1 "vector_all_ones_operand")
+	  (match_operand:VI_128_256 2 "const0_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VI_128_256 3 "nonimmediate_operand")
+	     (match_operand:VI_128_256 4 "nonimmediate_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_PCMP_ITER)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+   && (INTVAL (operands[5]) == 2 || INTVAL (operands[5]) == 5)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dst_min = gen_reg_rtx (<MODE>mode);
+
+  if (MEM_P (operands[3]) && MEM_P (operands[4]))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+  emit_insn (gen_<pcmp_usmin><mode>3 (dst_min, operands[3], operands[4]));
+  rtx eq_op = INTVAL (operands[5]) == 2 ? operands[3] : operands[4];
+  emit_move_insn (operands[0], gen_rtx_EQ (<MODE>mode, eq_op, dst_min));
+  DONE;
+})
+
+(define_insn_and_split "*avx2_pcmp<mode>3_7"
+ [(set (match_operand:VI_128_256  0 "register_operand")
+	(vec_merge:VI_128_256
+	  (match_operand:VI_128_256 1 "const0_operand")
+	  (match_operand:VI_128_256 2 "vector_all_ones_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VI_128_256 3 "nonimmediate_operand")
+	     (match_operand:VI_128_256 4 "nonimmediate_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_PCMP_ITER)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+     /* NE is commutative.  */
+   && (INTVAL (operands[5]) == 4
+	  /* LE, 3 must be register.  */
+       || INTVAL (operands[5]) == 2
+	  /* NLT aka GE, 4 must be register and we swap operands.  */
+       || INTVAL (operands[5]) == 5)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (INTVAL (operands[5]) == 5)
+    std::swap (operands[3], operands[4]);
+
+  if (MEM_P (operands[3]))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+  enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
+  emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
+					       operands[3], operands[4]));
+  DONE;
+})
+
 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(unspec:<avx512fmaskmode>