diff mbox series

[3/4,rtl] : simplify boolean vector EQ and NE comparisons

Message ID ZtdGkjP/lMTJwbfX@arm.com
State New
Headers show
Series [1/4] middle-end: have vect_recog_cond_store_pattern use pattern statement for cond if available | expand

Commit Message

Tamar Christina Sept. 3, 2024, 5:25 p.m. UTC
Hi All,

This adds vector constant simplification for EQ and NE.  This is useful since
the vectorizer generates a lot more vector compares now, in particular NE and EQ
and so these help us optimize cases where the values were not known at GIMPLE
but instead only at RTL.

Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf,
x86_64-pc-linux-gnu -m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* simplify-rtx.cc (simplify_context::simplify_unary_operation): Try
	simplifying operand.
	(simplify_const_relational_operation): Simplify vector EQ and NE.
	(test_vector_int_const_compare): New.
	(test_vector_int_const_compare_ops): New.
	(simplify_rtx_cc_tests): Use them.

---




--

Comments

Richard Sandiford Sept. 6, 2024, 1:20 p.m. UTC | #1
Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> This adds vector constant simplification for EQ and NE.  This is useful since
> the vectorizer generates a lot more vector compares now, in particular NE and EQ
> and so these help us optimize cases where the values were not known at GIMPLE
> but instead only at RTL.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf,
> x86_64-pc-linux-gnu -m32, -m64 and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* simplify-rtx.cc (simplify_context::simplify_unary_operation): Try
> 	simplifying operand.
> 	(simplify_const_relational_operation): Simplify vector EQ and NE.
> 	(test_vector_int_const_compare): New.
> 	(test_vector_int_const_compare_ops): New.
> 	(simplify_rtx_cc_tests): Use them.
>
> ---
>
> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> index a20a61c5dddbc80b23a9489d925a2c31b2163458..7e83e80246b70c81c388e77967f645d171efe983 100644
> --- a/gcc/simplify-rtx.cc
> +++ b/gcc/simplify-rtx.cc
> @@ -886,6 +886,10 @@ simplify_context::simplify_unary_operation (rtx_code code, machine_mode mode,
>  
>    trueop = avoid_constant_pool_reference (op);
>  
> +  /* If the operand is not a reg or constant try simplifying it first.  */
> +  if (rtx tmp_op = simplify_rtx (op))
> +    op = tmp_op;
> +

We shouldn't need to do this.  The assumption is that the operands are
already simplified.

Which caller required this?

>    tem = simplify_const_unary_operation (code, mode, trueop, op_mode);
>    if (tem)
>      return tem;
> @@ -6354,6 +6358,35 @@ simplify_const_relational_operation (enum rtx_code code,
>  	return 0;
>      }
>  
> +  /* Check if the operands are a vector EQ or NE comparison.  */
> +  if (VECTOR_MODE_P (mode)
> +      && INTEGRAL_MODE_P (mode)
> +      && GET_CODE (op0) == CONST_VECTOR
> +      && GET_CODE (op1) == CONST_VECTOR
> +      && (code == EQ || code == NE))
> +    {
> +      if (rtx_equal_p (op0, op1))
> +	return code == EQ ? const_true_rtx : const0_rtx;
> +
> +      unsigned int npatterns0, npatterns1;
> +      if (CONST_VECTOR_NUNITS (op0).is_constant (&npatterns0)
> +	  && CONST_VECTOR_NUNITS (op1).is_constant (&npatterns1))
> +	{
> +	  if (npatterns0 != npatterns1)
> +	    return code == EQ ? const0_rtx : const_true_rtx;

This looks like a typing error.  The operands have to have the same
number of elements.  But...

> +
> +	  for (unsigned i = 0; i < npatterns0; i++)
> +	    {
> +	      rtx val0 = CONST_VECTOR_ELT (op0, i);
> +	      rtx val1 = CONST_VECTOR_ELT (op1, i);
> +	      if (!rtx_equal_p (val0, val1))
> +		return code == EQ ? const0_rtx : const_true_rtx;
> +	    }
> +
> +	  return code == EQ ? const_true_rtx : const0_rtx;
> +	}

...when is this loop needed?  For constant-sized vectors, isn't the
result always rtx_equal_p for EQ and !rtx_equal_p for NE?  If we have
equal vectors for which rtx_equal_p returns false then that should be
fixed.

For variable-sized vectors, I suppose the question is whether the
first unequal element is found in the minimum vector length, or whether
it only occurs for larger lengths.  In the former case we can fold at
compile time, but in the latter case we can't.

So we probably do want the loop for variable-length vectors, up to
constant_lower_bound (CONST_VECTOR_NUNITS (...)).

> +    }
> +
>    /* We can't simplify MODE_CC values since we don't know what the
>       actual comparison is.  */
>    if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
> @@ -8820,6 +8853,55 @@ test_vector_ops ()
>      }
>  }
>  
> +/* Verify vector constant comparisons for EQ and NE.  */
> +
> +static void
> +test_vector_int_const_compare (machine_mode mode)
> +{
> +  rtx zeros = CONST0_RTX (mode);
> +  rtx minusone = CONSTM1_RTX (mode);
> +  rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx);
> +  ASSERT_RTX_EQ (const0_rtx,
> +		 simplify_const_relational_operation (EQ, mode, zeros,
> +						      CONST1_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (EQ, mode, zeros,
> +						      CONST0_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (EQ, mode, minusone,
> +						      CONSTM1_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (NE, mode, zeros,
> +						      CONST1_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (NE, mode, zeros,
> +						      series_0_1));
> +  ASSERT_RTX_EQ (const0_rtx,
> +		 simplify_const_relational_operation (EQ, mode, zeros,
> +						      series_0_1));
> +}
> +
> +/* Verify some simplifications involving vectors integer comparisons.  */
> +
> +static void
> +test_vector_int_const_compare_ops ()
> +{
> +  for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i)
> +    {
> +      machine_mode mode = (machine_mode) i;
> +      if (VECTOR_MODE_P (mode)
> +	  && INTEGRAL_MODE_P (mode)
> +	  && GET_MODE_NUNITS (mode).is_constant ())
> +	{
> +	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> +	      && maybe_gt (GET_MODE_NUNITS (mode), 2))
> +	    {
> +	      test_vector_int_const_compare (mode);
> +	    }
> +	}

If the above comments are right, we should be able to do this for
variable-sized vectors as well.

Thanks,
Richard

> +    }
> +}
> +
>  template<unsigned int N>
>  struct simplify_const_poly_int_tests
>  {
> @@ -8875,6 +8957,7 @@ simplify_rtx_cc_tests ()
>  {
>    test_scalar_ops ();
>    test_vector_ops ();
> +  test_vector_int_const_compare_ops ();
>    simplify_const_poly_int_tests<NUM_POLY_INT_COEFFS>::run ();
>  }
Tamar Christina Sept. 6, 2024, 1:47 p.m. UTC | #2
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Friday, September 6, 2024 2:21 PM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>
> Subject: Re: [PATCH 3/4][rtl]: simplify boolean vector EQ and NE comparisons
> 
> Tamar Christina <tamar.christina@arm.com> writes:
> > Hi All,
> >
> > This adds vector constant simplification for EQ and NE.  This is useful since
> > the vectorizer generates a lot more vector compares now, in particular NE and EQ
> > and so these help us optimize cases where the values were not known at GIMPLE
> > but instead only at RTL.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf,
> > x86_64-pc-linux-gnu -m32, -m64 and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* simplify-rtx.cc (simplify_context::simplify_unary_operation): Try
> > 	simplifying operand.
> > 	(simplify_const_relational_operation): Simplify vector EQ and NE.
> > 	(test_vector_int_const_compare): New.
> > 	(test_vector_int_const_compare_ops): New.
> > 	(simplify_rtx_cc_tests): Use them.
> >
> > ---
> >
> > diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> > index
> a20a61c5dddbc80b23a9489d925a2c31b2163458..7e83e80246b70c81c388e77
> 967f645d171efe983 100644
> > --- a/gcc/simplify-rtx.cc
> > +++ b/gcc/simplify-rtx.cc
> > @@ -886,6 +886,10 @@ simplify_context::simplify_unary_operation (rtx_code
> code, machine_mode mode,
> >
> >    trueop = avoid_constant_pool_reference (op);
> >
> > +  /* If the operand is not a reg or constant try simplifying it first.  */
> > +  if (rtx tmp_op = simplify_rtx (op))
> > +    op = tmp_op;
> > +
> 
> We shouldn't need to do this.  The assumption is that the operands are
> already simplified.
> 
> Which caller required this?
> 
> >    tem = simplify_const_unary_operation (code, mode, trueop, op_mode);
> >    if (tem)
> >      return tem;
> > @@ -6354,6 +6358,35 @@ simplify_const_relational_operation (enum rtx_code
> code,
> >  	return 0;
> >      }
> >
> > +  /* Check if the operands are a vector EQ or NE comparison.  */
> > +  if (VECTOR_MODE_P (mode)
> > +      && INTEGRAL_MODE_P (mode)
> > +      && GET_CODE (op0) == CONST_VECTOR
> > +      && GET_CODE (op1) == CONST_VECTOR
> > +      && (code == EQ || code == NE))
> > +    {
> > +      if (rtx_equal_p (op0, op1))
> > +	return code == EQ ? const_true_rtx : const0_rtx;
> > +
> > +      unsigned int npatterns0, npatterns1;
> > +      if (CONST_VECTOR_NUNITS (op0).is_constant (&npatterns0)
> > +	  && CONST_VECTOR_NUNITS (op1).is_constant (&npatterns1))
> > +	{
> > +	  if (npatterns0 != npatterns1)
> > +	    return code == EQ ? const0_rtx : const_true_rtx;
> 
> This looks like a typing error.  The operands have to have the same
> number of elements.  But...
> 
> > +
> > +	  for (unsigned i = 0; i < npatterns0; i++)
> > +	    {
> > +	      rtx val0 = CONST_VECTOR_ELT (op0, i);
> > +	      rtx val1 = CONST_VECTOR_ELT (op1, i);
> > +	      if (!rtx_equal_p (val0, val1))
> > +		return code == EQ ? const0_rtx : const_true_rtx;
> > +	    }
> > +
> > +	  return code == EQ ? const_true_rtx : const0_rtx;
> > +	}
> 
> ...when is this loop needed?  For constant-sized vectors, isn't the
> result always rtx_equal_p for EQ and !rtx_equal_p for NE?  If we have
> equal vectors for which rtx_equal_p returns false then that should be
> fixed.

Hmm I suppose, I guess

      if (rtx_equal_p (op0, op1))
	return code == EQ ? const_true_rtx : const0_rtx;
      else
	return code == NE ? const_true_rtx : const0_rtx;

does the same thing, 

Fair, I just didn't think about it ☹

> 
> For variable-sized vectors, I suppose the question is whether the
> first unequal element is found in the minimum vector length, or whether
> it only occurs for larger lengths.  In the former case we can fold at
> compile time, but in the latter case we can't.
> 
> So we probably do want the loop for variable-length vectors, up to
> constant_lower_bound (CONST_VECTOR_NUNITS (...)).
> 
> > +    }
> > +
> >    /* We can't simplify MODE_CC values since we don't know what the
> >       actual comparison is.  */
> >    if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
> > @@ -8820,6 +8853,55 @@ test_vector_ops ()
> >      }
> >  }
> >
> > +/* Verify vector constant comparisons for EQ and NE.  */
> > +
> > +static void
> > +test_vector_int_const_compare (machine_mode mode)
> > +{
> > +  rtx zeros = CONST0_RTX (mode);
> > +  rtx minusone = CONSTM1_RTX (mode);
> > +  rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx);
> > +  ASSERT_RTX_EQ (const0_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, zeros,
> > +						      CONST1_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, zeros,
> > +						      CONST0_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, minusone,
> > +						      CONSTM1_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (NE, mode, zeros,
> > +						      CONST1_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (NE, mode, zeros,
> > +						      series_0_1));
> > +  ASSERT_RTX_EQ (const0_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, zeros,
> > +						      series_0_1));
> > +}
> > +
> > +/* Verify some simplifications involving vectors integer comparisons.  */
> > +
> > +static void
> > +test_vector_int_const_compare_ops ()
> > +{
> > +  for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i)
> > +    {
> > +      machine_mode mode = (machine_mode) i;
> > +      if (VECTOR_MODE_P (mode)
> > +	  && INTEGRAL_MODE_P (mode)
> > +	  && GET_MODE_NUNITS (mode).is_constant ())
> > +	{
> > +	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> > +	      && maybe_gt (GET_MODE_NUNITS (mode), 2))
> > +	    {
> > +	      test_vector_int_const_compare (mode);
> > +	    }
> > +	}
> 
> If the above comments are right, we should be able to do this for
> variable-sized vectors as well.

Sure, will give it a try, thanks.

Cheers,
Tamar
> 
> Thanks,
> Richard
> 
> > +    }
> > +}
> > +
> >  template<unsigned int N>
> >  struct simplify_const_poly_int_tests
> >  {
> > @@ -8875,6 +8957,7 @@ simplify_rtx_cc_tests ()
> >  {
> >    test_scalar_ops ();
> >    test_vector_ops ();
> > +  test_vector_int_const_compare_ops ();
> >    simplify_const_poly_int_tests<NUM_POLY_INT_COEFFS>::run ();
> >  }
Tamar Christina Sept. 20, 2024, 11:58 a.m. UTC | #3
> For variable-sized vectors, I suppose the question is whether the
> first unequal element is found in the minimum vector length, or whether
> it only occurs for larger lengths.  In the former case we can fold at
> compile time, but in the latter case we can't.
> 
> So we probably do want the loop for variable-length vectors, up to
> constant_lower_bound (CONST_VECTOR_NUNITS (...)).
> 

Doesn't operand_equal already do this? it looks like the VLA handling
In same_vector_encodings_p rejects vectors that are not the same size,
which is good enough for this no? since I'm after strict equality.


Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf,
x86_64-pc-linux-gnu -m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* simplify-rtx.cc (simplify_context::simplify_unary_operation): Try
	simplifying operand.
	(simplify_const_relational_operation): Simplify vector EQ and NE.
	(test_vector_int_const_compare): New.
	(test_vector_ops): Use it.


-- inline copy of patch --

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index a20a61c5dddbc80b23a9489d925a2c31b2163458..8ba5864efb33ffa5d1ced99f6a7d0c73e12560d5 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -6354,6 +6354,19 @@ simplify_const_relational_operation (enum rtx_code code,
 	return 0;
     }
 
+  /* Check if the operands are a vector EQ or NE comparison.  */
+  if (VECTOR_MODE_P (mode)
+      && INTEGRAL_MODE_P (mode)
+      && GET_CODE (op0) == CONST_VECTOR
+      && GET_CODE (op1) == CONST_VECTOR
+      && (code == EQ || code == NE))
+    {
+      if (rtx_equal_p (op0, op1))
+	return code == EQ ? const_true_rtx : const0_rtx;
+      else
+	return code == NE ? const_true_rtx : const0_rtx;
+    }
+
   /* We can't simplify MODE_CC values since we don't know what the
      actual comparison is.  */
   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
@@ -8797,6 +8810,34 @@ test_vector_subregs (machine_mode inner_mode)
   test_vector_subregs_stepped (inner_mode);
 }
 
+/* Verify vector constant comparisons for EQ and NE.  */
+
+static void
+test_vector_int_const_compare (machine_mode mode)
+{
+  rtx zeros = CONST0_RTX (mode);
+  rtx minusone = CONSTM1_RTX (mode);
+  rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx);
+  ASSERT_RTX_EQ (const0_rtx,
+		 simplify_const_relational_operation (EQ, mode, zeros,
+						      CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (EQ, mode, zeros,
+						      CONST0_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (EQ, mode, minusone,
+						      CONSTM1_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (NE, mode, zeros,
+						      CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (NE, mode, zeros,
+						      series_0_1));
+  ASSERT_RTX_EQ (const0_rtx,
+		 simplify_const_relational_operation (EQ, mode, zeros,
+						      series_0_1));
+}
+
 /* Verify some simplifications involving vectors.  */
 
 static void
@@ -8814,6 +8855,7 @@ test_vector_ops ()
 	    {
 	      test_vector_ops_series (mode, scalar_reg);
 	      test_vector_subregs (mode);
+	      test_vector_int_const_compare (mode);
 	    }
 	  test_vec_merge (mode);
 	}
Richard Sandiford Sept. 20, 2024, 1:09 p.m. UTC | #4
Tamar Christina <Tamar.Christina@arm.com> writes:
>> For variable-sized vectors, I suppose the question is whether the
>> first unequal element is found in the minimum vector length, or whether
>> it only occurs for larger lengths.  In the former case we can fold at
>> compile time, but in the latter case we can't.
>> 
>> So we probably do want the loop for variable-length vectors, up to
>> constant_lower_bound (CONST_VECTOR_NUNITS (...)).
>> 
>
> Doesn't operand_equal already do this? it looks like the VLA handling
> In same_vector_encodings_p rejects vectors that are not the same size,
> which is good enough for this no? since I'm after strict equality.

But what I meant is that for VLA vectors, compile-time equality is
a tristate value: yes, no, or maybe.

E.g.:

   { 0, 0, 0, 0, 0, 0, 0, 0, ... }

is not equal to

   { 0, 0, 1, 1, 1, 1, 1, 1, ... }

if the runtime VL gives more than 2 elements, but they are equal if
the runtime VL gives 2 elements.  In this case, we can't fold EQ to
false at compile time if the minimum length is 2 elements, but we can
if the minimum length is 4 elements.

Similarly:

   { 0, 0, 1, 1, 1, 1, 1, 1, ... }

is only conditionally not equal to:

   { 0, 0, 1, 1, 2, 2, 3, 3, ... }

It isn't the case that every encoded value has to be present in every
runtime vector.  E.g. the series { 0, 1, 2, ... } exists for VNx2DI
(for INDEX Z0.D, #0, #1), even though there is never a "2" element for
the minimum vector length.

Thanks,
Richard

>
>
> Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf,
> x86_64-pc-linux-gnu -m32, -m64 and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* simplify-rtx.cc (simplify_context::simplify_unary_operation): Try
> 	simplifying operand.
> 	(simplify_const_relational_operation): Simplify vector EQ and NE.
> 	(test_vector_int_const_compare): New.
> 	(test_vector_ops): Use it.
>
>
> -- inline copy of patch --
>
> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> index a20a61c5dddbc80b23a9489d925a2c31b2163458..8ba5864efb33ffa5d1ced99f6a7d0c73e12560d5 100644
> --- a/gcc/simplify-rtx.cc
> +++ b/gcc/simplify-rtx.cc
> @@ -6354,6 +6354,19 @@ simplify_const_relational_operation (enum rtx_code code,
>  	return 0;
>      }
>  
> +  /* Check if the operands are a vector EQ or NE comparison.  */
> +  if (VECTOR_MODE_P (mode)
> +      && INTEGRAL_MODE_P (mode)
> +      && GET_CODE (op0) == CONST_VECTOR
> +      && GET_CODE (op1) == CONST_VECTOR
> +      && (code == EQ || code == NE))
> +    {
> +      if (rtx_equal_p (op0, op1))
> +	return code == EQ ? const_true_rtx : const0_rtx;
> +      else
> +	return code == NE ? const_true_rtx : const0_rtx;
> +    }
> +
>    /* We can't simplify MODE_CC values since we don't know what the
>       actual comparison is.  */
>    if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
> @@ -8797,6 +8810,34 @@ test_vector_subregs (machine_mode inner_mode)
>    test_vector_subregs_stepped (inner_mode);
>  }
>  
> +/* Verify vector constant comparisons for EQ and NE.  */
> +
> +static void
> +test_vector_int_const_compare (machine_mode mode)
> +{
> +  rtx zeros = CONST0_RTX (mode);
> +  rtx minusone = CONSTM1_RTX (mode);
> +  rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx);
> +  ASSERT_RTX_EQ (const0_rtx,
> +		 simplify_const_relational_operation (EQ, mode, zeros,
> +						      CONST1_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (EQ, mode, zeros,
> +						      CONST0_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (EQ, mode, minusone,
> +						      CONSTM1_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (NE, mode, zeros,
> +						      CONST1_RTX (mode)));
> +  ASSERT_RTX_EQ (const_true_rtx,
> +		 simplify_const_relational_operation (NE, mode, zeros,
> +						      series_0_1));
> +  ASSERT_RTX_EQ (const0_rtx,
> +		 simplify_const_relational_operation (EQ, mode, zeros,
> +						      series_0_1));
> +}
> +
>  /* Verify some simplifications involving vectors.  */
>  
>  static void
> @@ -8814,6 +8855,7 @@ test_vector_ops ()
>  	    {
>  	      test_vector_ops_series (mode, scalar_reg);
>  	      test_vector_subregs (mode);
> +	      test_vector_int_const_compare (mode);
>  	    }
>  	  test_vec_merge (mode);
>  	}
Tamar Christina Sept. 20, 2024, 1:37 p.m. UTC | #5
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Friday, September 20, 2024 2:10 PM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>
> Subject: Re: [PATCH 3/4][rtl]: simplify boolean vector EQ and NE comparisons
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> For variable-sized vectors, I suppose the question is whether the
> >> first unequal element is found in the minimum vector length, or whether
> >> it only occurs for larger lengths.  In the former case we can fold at
> >> compile time, but in the latter case we can't.
> >>
> >> So we probably do want the loop for variable-length vectors, up to
> >> constant_lower_bound (CONST_VECTOR_NUNITS (...)).
> >>
> >
> > Doesn't operand_equal already do this? it looks like the VLA handling
> > In same_vector_encodings_p rejects vectors that are not the same size,
> > which is good enough for this no? since I'm after strict equality.
> 
> But what I meant is that for VLA vectors, compile-time equality is
> a tristate value: yes, no, or maybe.
> 
> E.g.:
> 
>    { 0, 0, 0, 0, 0, 0, 0, 0, ... }
> 
> is not equal to
> 
>    { 0, 0, 1, 1, 1, 1, 1, 1, ... }
> 
> if the runtime VL gives more than 2 elements, but they are equal if
> the runtime VL gives 2 elements.  In this case, we can't fold EQ to
> false at compile time if the minimum length is 2 elements, but we can
> if the minimum length is 4 elements.
> 
> Similarly:
> 
>    { 0, 0, 1, 1, 1, 1, 1, 1, ... }
> 
> is only conditionally not equal to:
> 
>    { 0, 0, 1, 1, 2, 2, 3, 3, ... }
> 
> It isn't the case that every encoded value has to be present in every
> runtime vector.  E.g. the series { 0, 1, 2, ... } exists for VNx2DI
> (for INDEX Z0.D, #0, #1), even though there is never a "2" element for
> the minimum vector length.

Ah ok... so if I understand correctly, VLA series aren't capped by the VL
(e.g. representable values) In RTL but represent the base + step only.
So the series for a VNx2DI and a VNx4SI are the same but what the
usable bits are is determined by the mode/VL?

That's really not how I thought they were represented but get why you
want a loop now...

Tamar.

> 
> Thanks,
> Richard
> 
> >
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf,
> > x86_64-pc-linux-gnu -m32, -m64 and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* simplify-rtx.cc (simplify_context::simplify_unary_operation): Try
> > 	simplifying operand.
> > 	(simplify_const_relational_operation): Simplify vector EQ and NE.
> > 	(test_vector_int_const_compare): New.
> > 	(test_vector_ops): Use it.
> >
> >
> > -- inline copy of patch --
> >
> > diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> > index
> a20a61c5dddbc80b23a9489d925a2c31b2163458..8ba5864efb33ffa5d1ced99f
> 6a7d0c73e12560d5 100644
> > --- a/gcc/simplify-rtx.cc
> > +++ b/gcc/simplify-rtx.cc
> > @@ -6354,6 +6354,19 @@ simplify_const_relational_operation (enum rtx_code
> code,
> >  	return 0;
> >      }
> >
> > +  /* Check if the operands are a vector EQ or NE comparison.  */
> > +  if (VECTOR_MODE_P (mode)
> > +      && INTEGRAL_MODE_P (mode)
> > +      && GET_CODE (op0) == CONST_VECTOR
> > +      && GET_CODE (op1) == CONST_VECTOR
> > +      && (code == EQ || code == NE))
> > +    {
> > +      if (rtx_equal_p (op0, op1))
> > +	return code == EQ ? const_true_rtx : const0_rtx;
> > +      else
> > +	return code == NE ? const_true_rtx : const0_rtx;
> > +    }
> > +
> >    /* We can't simplify MODE_CC values since we don't know what the
> >       actual comparison is.  */
> >    if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
> > @@ -8797,6 +8810,34 @@ test_vector_subregs (machine_mode inner_mode)
> >    test_vector_subregs_stepped (inner_mode);
> >  }
> >
> > +/* Verify vector constant comparisons for EQ and NE.  */
> > +
> > +static void
> > +test_vector_int_const_compare (machine_mode mode)
> > +{
> > +  rtx zeros = CONST0_RTX (mode);
> > +  rtx minusone = CONSTM1_RTX (mode);
> > +  rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx);
> > +  ASSERT_RTX_EQ (const0_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, zeros,
> > +						      CONST1_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, zeros,
> > +						      CONST0_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, minusone,
> > +						      CONSTM1_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (NE, mode, zeros,
> > +						      CONST1_RTX (mode)));
> > +  ASSERT_RTX_EQ (const_true_rtx,
> > +		 simplify_const_relational_operation (NE, mode, zeros,
> > +						      series_0_1));
> > +  ASSERT_RTX_EQ (const0_rtx,
> > +		 simplify_const_relational_operation (EQ, mode, zeros,
> > +						      series_0_1));
> > +}
> > +
> >  /* Verify some simplifications involving vectors.  */
> >
> >  static void
> > @@ -8814,6 +8855,7 @@ test_vector_ops ()
> >  	    {
> >  	      test_vector_ops_series (mode, scalar_reg);
> >  	      test_vector_subregs (mode);
> > +	      test_vector_int_const_compare (mode);
> >  	    }
> >  	  test_vec_merge (mode);
> >  	}
Richard Sandiford Sept. 20, 2024, 2:12 p.m. UTC | #6
Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Friday, September 20, 2024 2:10 PM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>
>> Subject: Re: [PATCH 3/4][rtl]: simplify boolean vector EQ and NE comparisons
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> For variable-sized vectors, I suppose the question is whether the
>> >> first unequal element is found in the minimum vector length, or whether
>> >> it only occurs for larger lengths.  In the former case we can fold at
>> >> compile time, but in the latter case we can't.
>> >>
>> >> So we probably do want the loop for variable-length vectors, up to
>> >> constant_lower_bound (CONST_VECTOR_NUNITS (...)).
>> >>
>> >
>> > Doesn't operand_equal already do this? it looks like the VLA handling
>> > In same_vector_encodings_p rejects vectors that are not the same size,
>> > which is good enough for this no? since I'm after strict equality.
>> 
>> But what I meant is that for VLA vectors, compile-time equality is
>> a tristate value: yes, no, or maybe.
>> 
>> E.g.:
>> 
>>    { 0, 0, 0, 0, 0, 0, 0, 0, ... }
>> 
>> is not equal to
>> 
>>    { 0, 0, 1, 1, 1, 1, 1, 1, ... }
>> 
>> if the runtime VL gives more than 2 elements, but they are equal if
>> the runtime VL gives 2 elements.  In this case, we can't fold EQ to
>> false at compile time if the minimum length is 2 elements, but we can
>> if the minimum length is 4 elements.
>> 
>> Similarly:
>> 
>>    { 0, 0, 1, 1, 1, 1, 1, 1, ... }
>> 
>> is only conditionally not equal to:
>> 
>>    { 0, 0, 1, 1, 2, 2, 3, 3, ... }
>> 
>> It isn't the case that every encoded value has to be present in every
>> runtime vector.  E.g. the series { 0, 1, 2, ... } exists for VNx2DI
>> (for INDEX Z0.D, #0, #1), even though there is never a "2" element for
>> the minimum vector length.
>
> Ah ok... so if I understand correctly, VLA series aren't capped by the VL
> (e.g. representable values) In RTL but represent the base + step only.
> So the series for a VNx2DI and a VNx4SI are the same but what the
> usable bits are is determined by the mode/VL?

Right.  For VLA vectors, the vector constant encoding represents an
infinite series and the runtime VL decides how many elements to take
from the series.  It's a little more complex than just base + step,
since we allow a leading fixed-length sequence of "arbitrary" values
followed by a series of "regular" values.

But yeah, the "regular" part of the series consists of interleaved linear
series.  And some runtime VL might only take the leading "arbitrary"
elements, without digging in to the "regular" part.  Or they might
take the base elements without getting as far as using the steps.

Thanks,
Richard

>
> That's really not how I thought they were represented but get why you
> want a loop now...
>
> Tamar.
diff mbox series

Patch

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index a20a61c5dddbc80b23a9489d925a2c31b2163458..7e83e80246b70c81c388e77967f645d171efe983 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -886,6 +886,10 @@  simplify_context::simplify_unary_operation (rtx_code code, machine_mode mode,
 
   trueop = avoid_constant_pool_reference (op);
 
+  /* If the operand is not a reg or constant try simplifying it first.  */
+  if (rtx tmp_op = simplify_rtx (op))
+    op = tmp_op;
+
   tem = simplify_const_unary_operation (code, mode, trueop, op_mode);
   if (tem)
     return tem;
@@ -6354,6 +6358,35 @@  simplify_const_relational_operation (enum rtx_code code,
 	return 0;
     }
 
+  /* Check if the operands are a vector EQ or NE comparison.  */
+  if (VECTOR_MODE_P (mode)
+      && INTEGRAL_MODE_P (mode)
+      && GET_CODE (op0) == CONST_VECTOR
+      && GET_CODE (op1) == CONST_VECTOR
+      && (code == EQ || code == NE))
+    {
+      if (rtx_equal_p (op0, op1))
+	return code == EQ ? const_true_rtx : const0_rtx;
+
+      unsigned int npatterns0, npatterns1;
+      if (CONST_VECTOR_NUNITS (op0).is_constant (&npatterns0)
+	  && CONST_VECTOR_NUNITS (op1).is_constant (&npatterns1))
+	{
+	  if (npatterns0 != npatterns1)
+	    return code == EQ ? const0_rtx : const_true_rtx;
+
+	  for (unsigned i = 0; i < npatterns0; i++)
+	    {
+	      rtx val0 = CONST_VECTOR_ELT (op0, i);
+	      rtx val1 = CONST_VECTOR_ELT (op1, i);
+	      if (!rtx_equal_p (val0, val1))
+		return code == EQ ? const0_rtx : const_true_rtx;
+	    }
+
+	  return code == EQ ? const_true_rtx : const0_rtx;
+	}
+    }
+
   /* We can't simplify MODE_CC values since we don't know what the
      actual comparison is.  */
   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
@@ -8820,6 +8853,55 @@  test_vector_ops ()
     }
 }
 
+/* Verify vector constant comparisons for EQ and NE.  */
+
+static void
+test_vector_int_const_compare (machine_mode mode)
+{
+  rtx zeros = CONST0_RTX (mode);
+  rtx minusone = CONSTM1_RTX (mode);
+  rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx);
+  ASSERT_RTX_EQ (const0_rtx,
+		 simplify_const_relational_operation (EQ, mode, zeros,
+						      CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (EQ, mode, zeros,
+						      CONST0_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (EQ, mode, minusone,
+						      CONSTM1_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (NE, mode, zeros,
+						      CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (const_true_rtx,
+		 simplify_const_relational_operation (NE, mode, zeros,
+						      series_0_1));
+  ASSERT_RTX_EQ (const0_rtx,
+		 simplify_const_relational_operation (EQ, mode, zeros,
+						      series_0_1));
+}
+
+/* Verify some simplifications involving vectors integer comparisons.  */
+
+static void
+test_vector_int_const_compare_ops ()
+{
+  for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i)
+    {
+      machine_mode mode = (machine_mode) i;
+      if (VECTOR_MODE_P (mode)
+	  && INTEGRAL_MODE_P (mode)
+	  && GET_MODE_NUNITS (mode).is_constant ())
+	{
+	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	      && maybe_gt (GET_MODE_NUNITS (mode), 2))
+	    {
+	      test_vector_int_const_compare (mode);
+	    }
+	}
+    }
+}
+
 template<unsigned int N>
 struct simplify_const_poly_int_tests
 {
@@ -8875,6 +8957,7 @@  simplify_rtx_cc_tests ()
 {
   test_scalar_ops ();
   test_vector_ops ();
+  test_vector_int_const_compare_ops ();
   simplify_const_poly_int_tests<NUM_POLY_INT_COEFFS>::run ();
 }