diff mbox series

rs6000: Fix PTImode handling in power8 swap optimization pass [PR116415]

Message ID 27941701-c6f0-4b44-8482-3567213584f4@linux.ibm.com
State New
Headers show
Series rs6000: Fix PTImode handling in power8 swap optimization pass [PR116415] | expand

Commit Message

Peter Bergner Aug. 21, 2024, 1:14 p.m. UTC
Our power8 swap optimization pass has some special handling for optimizing
swaps of TImode variables.  The test case reported in bugzilla uses a call
to  __atomic_compare_exchange, which introduces a variable of PTImode and
that does not get the same treatment as TImode leading to wrong code
generation.  The simple fix is to treat PTImode identically to TImode.

This passed bootstrap and regtesting on powerpc64le-linux with no regressions.
I also confirmed the testcase is correctly not run on -m32 BE and passes
on -m64 BE.

Ok for trunk?


This is broken back to GCC 12, so ok for the releases branches after some
bake-in time on trunk?

Peter


gcc/
	PR target/116415
	* config/rs6000/rs6000-p8swap.cc (rs6000_analyze_swaps): Handle PTImode
	identically to TImode.

gcc/testsuite/
	PR target/116415
	* gcc.target/powerpc/pr116415.c: New test.

Comments

Kewen.Lin Aug. 22, 2024, 9:39 a.m. UTC | #1
Hi Peter,

on 2024/8/21 21:14, Peter Bergner wrote:
> Our power8 swap optimization pass has some special handling for optimizing
> swaps of TImode variables.  The test case reported in bugzilla uses a call
> to  __atomic_compare_exchange, which introduces a variable of PTImode and
> that does not get the same treatment as TImode leading to wrong code
> generation.  The simple fix is to treat PTImode identically to TImode.

Thanks for fixing, the fix looks good to me.  The interesting rtxes, which
are optimized by p8swap and cause this issue, have both subreg on PTImode
and vector mode, the subreg special adjustment doesn't differentiate them
and replaces the one on PTImode unexpectedly, the fix makes the pass
consider it as not optimizable.

> 
> This passed bootstrap and regtesting on powerpc64le-linux with no regressions.
> I also confirmed the testcase is correctly not run on -m32 BE and passes
> on -m64 BE.
> 
> Ok for trunk?
> 
> 
> This is broken back to GCC 12, so ok for the releases branches after some
> bake-in time on trunk?
> 
> Peter
> 
> 
> gcc/
> 	PR target/116415
> 	* config/rs6000/rs6000-p8swap.cc (rs6000_analyze_swaps): Handle PTImode
> 	identically to TImode.
> 
> gcc/testsuite/
> 	PR target/116415
> 	* gcc.target/powerpc/pr116415.c: New test.
> 
> 
> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
> index 639f477d782..15e44bb63a6 100644
> --- a/gcc/config/rs6000/rs6000-p8swap.cc
> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
> @@ -2469,10 +2469,11 @@ rs6000_analyze_swaps (function *fun)
>  		    mode = V4SImode;
>  		}
>  
> -	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
> +	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode
> +		  || mode == PTImode)

Maybe we can introduce a macro to this file like

#define TI_OR_PTI_MODE(mode) ((mode) == TImode || (mode) == PTImode)

to simplify it a bit and people would likely notice and use this if they
want to add more handling for TImode in future (then it'll naturally
consider PTImode as well).

>  		{
>  		  insn_entry[uid].is_relevant = 1;
> -		  if (mode == TImode || mode == V1TImode
> +		  if (mode == TImode || mode == PTImode || mode == V1TImode
>  		      || FLOAT128_VECTOR_P (mode))
>  		    insn_entry[uid].is_128_int = 1;
>  		  if (DF_REF_INSN_INFO (mention))
> @@ -2497,10 +2498,11 @@ rs6000_analyze_swaps (function *fun)
>  		  && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
>  		mode = GET_MODE (SET_DEST (insn));
>  
> -	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
> +	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode
> +		  || mode == PTImode)
>  		{
>  		  insn_entry[uid].is_relevant = 1;
> -		  if (mode == TImode || mode == V1TImode
> +		  if (mode == TImode || mode == PTImode || mode == V1TImode
>  		      || FLOAT128_VECTOR_P (mode))
>  		    insn_entry[uid].is_128_int = 1;
>  		  if (DF_REF_INSN_INFO (mention))
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr116415.c b/gcc/testsuite/gcc.target/powerpc/pr116415.c
> new file mode 100644
> index 00000000000..5fad810ceb0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr116415.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run } */
> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */

Nit: This dg-skip-if line looks not necessary as p8vector_hw excludes *-*-darwin*.

OK for trunk and all active release branches with/without these nits tweaked,
but please give others two days or so to comment, thanks!

BR,
Kewen

> +/* { dg-require-effective-target p8vector_hw } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
> +
> +/* PR 116415: Verify our Power8 swap optimization pass doesn't incorrectly swap
> +   PTImode values.  They should be handled identically to TImode values.  */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +typedef union {
> +  struct {
> +    uint64_t a;
> +    uint64_t b;
> +  } t;
> +  __uint128_t data;
> +} Value;
> +Value value, next;
> +
> +void
> +bug (Value *val, Value *nxt)
> +{
> +  for (;;) {
> +    nxt->t.a = val->t.a + 1;
> +    nxt->t.b = val->t.b + 2;
> +    if (__atomic_compare_exchange (&val->data, &val->data, &nxt->data,
> +				   0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE))
> +      break;
> +  }
> +}
> +
> +int
> +main (void)
> +{
> +  bug (&value, &next);
> +  printf ("%lu %lu\n", value.t.a, value.t.b);
> +  if (value.t.a != 1 || value.t.b != 2)
> +    abort ();
> +  return 0;
> +}
Peter Bergner Aug. 23, 2024, 1:48 a.m. UTC | #2
On 8/22/24 4:39 AM, Kewen.Lin wrote:
> on 2024/8/21 21:14, Peter Bergner wrote:
>> -	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
>> +	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode
>> +		  || mode == PTImode)
> 
> Maybe we can introduce a macro to this file like
> 
> #define TI_OR_PTI_MODE(mode) ((mode) == TImode || (mode) == PTImode)
> 
> to simplify it a bit and people would likely notice and use this if they
> want to add more handling for TImode in future (then it'll naturally
> consider PTImode as well).

I was a little surprised we didn't have that macro already.  Ok, consider
it changed with your suggestion.

I agree, there probably is code in the backend that currently handles TImode
that should probably be changed to handle both by using your new macro.




>> +/* { dg-do run } */
>> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
> 
> Nit: This dg-skip-if line looks not necessary as p8vector_hw excludes *-*-darwin*.

I borrowed the dg-skip-if from test cases that use __atomic_compare_exchange
which all ship darwin, so I added it.  It was later I added the p8vector_hw
which as you say already excludes darwin, so ok, I'll remove the dg-skip-if
of darwin.  Thanks for catching that.



> OK for trunk and all active release branches with/without these nits tweaked,
> but please give others two days or so to comment, thanks!

I'll make the suggested changes and push them to trunk when my new set of
regtests are clean.  I'll let it bake there and then push to the release
later.   Thanks!

Peter
Peter Bergner Aug. 23, 2024, 4:51 p.m. UTC | #3
On 8/22/24 8:48 PM, Peter Bergner wrote:
> On 8/22/24 4:39 AM, Kewen.Lin wrote:
>> OK for trunk and all active release branches with/without these nits tweaked,
>> but please give others two days or so to comment, thanks!
> 
> I'll make the suggested changes and push them to trunk when my new set of
> regtests are clean.  I'll let it bake there and then push to the release
> later.   Thanks!

The updated patch tested clean as expected, so I pushed it to trunk.
I'll let it sit there for a bit to let our CI testers verify it doesn't
expose any issues on our various different builds before pushing to
the release branches.  Thanks!

Peter
Segher Boessenkool Aug. 26, 2024, 3:39 p.m. UTC | #4
Hi!

On Thu, Aug 22, 2024 at 05:39:36PM +0800, Kewen.Lin wrote:
> > -	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
> > +	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode
> > +		  || mode == PTImode)
> 
> Maybe we can introduce a macro to this file like
> 
> #define TI_OR_PTI_MODE(mode) ((mode) == TImode || (mode) == PTImode)

INTEGRAL_MODE_P (mode) && MODE_UNIT_SIZE (mode) == 16  or such?

Or you might just want the check for 16, that covers all applicable
modes and nothing else, right?

The correct indentation is

	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
		  || mode == TImode
		  || mode == PTImode)

btw, or you can put the TImode and PTImode on one line if you really
have to, but don't put unalike things on the same line.

I don't like macros that you use just one or two times.  It is much
clearer if you write it out whereever you use it.

> OK for trunk and all active release branches with/without these nits tweaked,
> but please give others two days or so to comment, thanks!

Okay for trunk right now, and backports after the customary wait.  Also
okay with just the  MODE_UNIT_SIZE (mode) == 16  thing, after you tested
that of course :-)

Thanks!


Segher
Segher Boessenkool Aug. 26, 2024, 3:45 p.m. UTC | #5
Hi!

On Thu, Aug 22, 2024 at 08:48:19PM -0500, Peter Bergner wrote:
> I was a little surprised we didn't have that macro already.  Ok, consider
> it changed with your suggestion.
> 
> I agree, there probably is code in the backend that currently handles TImode
> that should probably be changed to handle both by using your new macro.

That is what mode classes are for, or just the mode size here :-)

> >> +/* { dg-do run } */
> >> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
> > 
> > Nit: This dg-skip-if line looks not necessary as p8vector_hw excludes *-*-darwin*.

Every single "skip-if darwin" is incorrect if it isn't added by Iain.
Skipping a test on some target is fine, if it would fail on that target
(but do put a comment in!), but skipping because you are scared it will
fail on some target, or you don't care about that target, or just
cargo-cult, is wrong, and encourages more wrongness.


Segher
Peter Bergner Aug. 30, 2024, 7:29 p.m. UTC | #6
On 8/26/24 10:45 AM, Segher Boessenkool wrote:
> On Thu, Aug 22, 2024 at 08:48:19PM -0500, Peter Bergner wrote:
>> I agree, there probably is code in the backend that currently handles TImode
>> that should probably be changed to handle both by using your new macro.
> 
> That is what mode classes are for, or just the mode size here :-)

I don't think mode size alone work work here, since that would include
TF/KF/IF modes.  Mode class & mode size would probably work, but I don't
think that is any simpler than the new macro or explicitly testing for
TI/PTI modes.


>>>> +/* { dg-do run } */
>>>> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
>>>
>>> Nit: This dg-skip-if line looks not necessary as p8vector_hw excludes *-*-darwin*.
> 
> Every single "skip-if darwin" is incorrect if it isn't added by Iain.
> Skipping a test on some target is fine, if it would fail on that target
> (but do put a comment in!), but skipping because you are scared it will
> fail on some target, or you don't care about that target, or just
> cargo-cult, is wrong, and encourages more wrongness.

I added it because all of the other test cases that use __atomic_compare_exchange*
have a skip-if darwin test and nothing else in the test case looked like it
wasn't supported on darwin, so i thought it must be for the __a_c_e call.
Ok, "all" of the other tests was just pr57744.c, but it's moot anyway, since as
Kewen mentioned, it was redundant due to the p8vector_hw test, so I removed it.

Peter
Peter Bergner Aug. 30, 2024, 8:05 p.m. UTC | #7
On 8/26/24 10:39 AM, Segher Boessenkool wrote:
> On Thu, Aug 22, 2024 at 05:39:36PM +0800, Kewen.Lin wrote:
>>> -	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
>>> +	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode
>>> +		  || mode == PTImode)
>>
>> Maybe we can introduce a macro to this file like
>>
>> #define TI_OR_PTI_MODE(mode) ((mode) == TImode || (mode) == PTImode)
> 
> INTEGRAL_MODE_P (mode) && MODE_UNIT_SIZE (mode) == 16  or such?

So I had already pushed the code using "((mode) == TImode || (mode) == PTImode)"
by the time you replied.  I think your suggestion would work too and I could
change it to that if you want.  That would make it future proof when we have
PPTImode, PTTImode, ...! :-)  That said, maybe the TI_OR_PTI_MODE name isn't
the best name if we will have another 128-bit integer mode in the future.
Maybe something like:

#define INT128_MODE_P(mode) (INTEGRAL_MODE_P (mode) && MODE_UNIT_SIZE (mode) == 16)

...or some such name?  Thoughts?

I'll give it a spin through bootstrap and regtesting anyway.



> Or you might just want the check for 16, that covers all applicable
> modes and nothing else, right?

IIRC, I think we only want integer modes here, so I think we need more than
just the mode size check so we don't match against TD/TF/KF/IF modes.




> I don't like macros that you use just one or two times.  It is much
> clearer if you write it out whereever you use it.

I think Kewen's thought was (and I agree) is that we have a fair amount of
code that handles TImode that should probably handle both TImode and PTImode
together, so even though we currently only have one use of the macro, the
number of uses should increase as we clean up the other areas than should
handle PTImode too.

Currently, I think the only time PTImode shows up is through use of the
__atomic* intrinsics, but when Jeevitha's patch that allows the kernel
to use an attribute to create PTImode variables so they can have higher
alignment requirements, the number of PTImode uses we see will probably
increase.


Peter
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 639f477d782..15e44bb63a6 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -2469,10 +2469,11 @@  rs6000_analyze_swaps (function *fun)
 		    mode = V4SImode;
 		}
 
-	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
+	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode
+		  || mode == PTImode)
 		{
 		  insn_entry[uid].is_relevant = 1;
-		  if (mode == TImode || mode == V1TImode
+		  if (mode == TImode || mode == PTImode || mode == V1TImode
 		      || FLOAT128_VECTOR_P (mode))
 		    insn_entry[uid].is_128_int = 1;
 		  if (DF_REF_INSN_INFO (mention))
@@ -2497,10 +2498,11 @@  rs6000_analyze_swaps (function *fun)
 		  && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
 		mode = GET_MODE (SET_DEST (insn));
 
-	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
+	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode
+		  || mode == PTImode)
 		{
 		  insn_entry[uid].is_relevant = 1;
-		  if (mode == TImode || mode == V1TImode
+		  if (mode == TImode || mode == PTImode || mode == V1TImode
 		      || FLOAT128_VECTOR_P (mode))
 		    insn_entry[uid].is_128_int = 1;
 		  if (DF_REF_INSN_INFO (mention))
diff --git a/gcc/testsuite/gcc.target/powerpc/pr116415.c b/gcc/testsuite/gcc.target/powerpc/pr116415.c
new file mode 100644
index 00000000000..5fad810ceb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr116415.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* PR 116415: Verify our Power8 swap optimization pass doesn't incorrectly swap
+   PTImode values.  They should be handled identically to TImode values.  */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef union {
+  struct {
+    uint64_t a;
+    uint64_t b;
+  } t;
+  __uint128_t data;
+} Value;
+Value value, next;
+
+void
+bug (Value *val, Value *nxt)
+{
+  for (;;) {
+    nxt->t.a = val->t.a + 1;
+    nxt->t.b = val->t.b + 2;
+    if (__atomic_compare_exchange (&val->data, &val->data, &nxt->data,
+				   0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE))
+      break;
+  }
+}
+
+int
+main (void)
+{
+  bug (&value, &next);
+  printf ("%lu %lu\n", value.t.a, value.t.b);
+  if (value.t.a != 1 || value.t.b != 2)
+    abort ();
+  return 0;
+}