diff mbox

PR rtl-optimization/53352

Message ID 4FB2B0F1.40601@codesourcery.com
State New
Headers show

Commit Message

Meador Inge May 15, 2012, 7:39 p.m. UTC
Hi All,

As reported in PR rtl-optimization/53352 CSE currently trips up on a
paradoxical subreg case.  When compiling for ARM GNU/Linux with -O3
the expanded RTL of interest looks like:

(insn 12 11 13 3 (set (reg:SI 140)
        (lshiftrt:SI (reg/v:SI 135 [ tmp1 ])
            (const_int 16 [0x10])))
     (nil))

(insn 13 12 14 3 (set (reg:QI 141)
        (subreg:QI (reg:SI 140) 0))
     (nil))

(insn 14 13 15 3 (set (reg:SI 142)
        (subreg:SI (reg:QI 141) 0))
     (nil))

(insn 15 14 16 3 (set (reg:SI 134 [ tmp1$2 ])
        (and:SI (reg:SI 142)
            (const_int 255 [0xff])))
     (nil))

...

(insn 29 28 30 3 (set (reg:SI 0 r0)
        (const_int 0 [0]))
     (nil))

after "cse1" things look like:

(insn 12 11 13 2 (set (reg:SI 140)
        (const_int 65280 [0xff00]))
     (nil))

(insn 13 12 14 2 (set (reg:QI 141)
        (subreg:QI (reg:SI 140) 0))
     (expr_list:REG_EQUAL (const_int 0 [0])
        (nil)))

;; This is *not* equal to zero because the upper
;; two bytes are undefined.
(insn 14 13 15 2 (set (reg:SI 142)
        (subreg:SI (reg:QI 141) 0))
     (expr_list:REG_EQUAL (const_int 0 [0])
        (nil)))

(insn 15 14 16 2 (set (reg:SI 134 [ tmp1$2 ])
        (reg:SI 142))
     (expr_list:REG_EQUAL (const_int 0 [0])
        (nil)))

...

(insn 29 28 30 2 (set (reg:SI 0 r0)
        (reg:SI 142))
     (expr_list:REG_EQUAL (const_int 0 [0])
        (nil)))

I believe the REG_EQUAL note on the set involving a paradoxical subreg
is incorrect.  It eventually causes 0xFF00 to be passed to the function
'foo'.

The attached patch fixes the issue by skipping the paradoxical subreg
in 'equiv_constant'.  Compiler bootstrapped for i686-pc-linux-gnu and
full GCC test runs for i686-pc-linux-gnu and arm-none-linux-gnueabi (no
regressions).  OK?  (If this is OK, then can someone commit for me.  I don't
have write access).

gcc/

2012-05-15  Meador Inge  <meadori@codesourcery.com>

	PR rtl-optimization/53352

	* cse.c (equiv_constant): Ignore paradoxical subregs.

gcc/testsuite/

2012-05-15  Meador Inge  <meadori@codesourcery.com>

	PR rtl-optimization/53352

	* gcc.dg/pr53352.c: New test.

Comments

Meador Inge May 16, 2012, 10:15 p.m. UTC | #1
I meant to CC the RTL optimization maintainers before ...

On 05/15/2012 02:39 PM, Meador Inge wrote:
> Hi All,
> 
> As reported in PR rtl-optimization/53352 CSE currently trips up on a
> paradoxical subreg case.  When compiling for ARM GNU/Linux with -O3
> the expanded RTL of interest looks like:
> 
> (insn 12 11 13 3 (set (reg:SI 140)
>         (lshiftrt:SI (reg/v:SI 135 [ tmp1 ])
>             (const_int 16 [0x10])))
>      (nil))
> 
> (insn 13 12 14 3 (set (reg:QI 141)
>         (subreg:QI (reg:SI 140) 0))
>      (nil))
> 
> (insn 14 13 15 3 (set (reg:SI 142)
>         (subreg:SI (reg:QI 141) 0))
>      (nil))
> 
> (insn 15 14 16 3 (set (reg:SI 134 [ tmp1$2 ])
>         (and:SI (reg:SI 142)
>             (const_int 255 [0xff])))
>      (nil))
> 
> ...
> 
> (insn 29 28 30 3 (set (reg:SI 0 r0)
>         (const_int 0 [0]))
>      (nil))
> 
> after "cse1" things look like:
> 
> (insn 12 11 13 2 (set (reg:SI 140)
>         (const_int 65280 [0xff00]))
>      (nil))
> 
> (insn 13 12 14 2 (set (reg:QI 141)
>         (subreg:QI (reg:SI 140) 0))
>      (expr_list:REG_EQUAL (const_int 0 [0])
>         (nil)))
> 
> ;; This is *not* equal to zero because the upper
> ;; two bytes are undefined.
> (insn 14 13 15 2 (set (reg:SI 142)
>         (subreg:SI (reg:QI 141) 0))
>      (expr_list:REG_EQUAL (const_int 0 [0])
>         (nil)))
> 
> (insn 15 14 16 2 (set (reg:SI 134 [ tmp1$2 ])
>         (reg:SI 142))
>      (expr_list:REG_EQUAL (const_int 0 [0])
>         (nil)))
> 
> ...
> 
> (insn 29 28 30 2 (set (reg:SI 0 r0)
>         (reg:SI 142))
>      (expr_list:REG_EQUAL (const_int 0 [0])
>         (nil)))
> 
> I believe the REG_EQUAL note on the set involving a paradoxical subreg
> is incorrect.  It eventually causes 0xFF00 to be passed to the function
> 'foo'.
> 
> The attached patch fixes the issue by skipping the paradoxical subreg
> in 'equiv_constant'.  Compiler bootstrapped for i686-pc-linux-gnu and
> full GCC test runs for i686-pc-linux-gnu and arm-none-linux-gnueabi (no
> regressions).  OK?  (If this is OK, then can someone commit for me.  I don't
> have write access).
> 
> gcc/
> 
> 2012-05-15  Meador Inge  <meadori@codesourcery.com>
> 
> 	PR rtl-optimization/53352
> 
> 	* cse.c (equiv_constant): Ignore paradoxical subregs.
> 
> gcc/testsuite/
> 
> 2012-05-15  Meador Inge  <meadori@codesourcery.com>
> 
> 	PR rtl-optimization/53352
> 
> 	* gcc.dg/pr53352.c: New test.
> 
> 
> 
> pr53352.patch
> 
> 
> Index: gcc/testsuite/gcc.dg/pr53352.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/pr53352.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/pr53352.c	(revision 0)
> @@ -0,0 +1,41 @@
> +/* { dg-do run } */
> +/* { dg-options "-O1" } */
> +
> +#include <stdlib.h>
> +
> +typedef union
> +{
> +  struct
> +  {
> +    unsigned char a;
> +    unsigned char b;
> +    unsigned char c;
> +    unsigned char d;
> +  } parts;
> +  unsigned long whole;
> +} T;
> +
> +T *g_t;
> +
> +void bar (unsigned long x)
> +{
> +  if (x != 0)
> +    abort ();
> +}
> +
> +int main ()
> +{
> +  T one;
> +  T two;
> +  T tmp1, tmp2;
> +
> +  one.whole = 0xFFE0E0E0;
> +  two.whole = 0xFF000000;
> +  tmp1.parts = two.parts;
> +  tmp2.parts = one.parts;
> +  tmp2.parts.c = tmp1.parts.c;
> +  one.parts = tmp2.parts;
> +
> +  g_t = &one;
> +  bar (0);
> +}
> Index: gcc/cse.c
> ===================================================================
> --- gcc/cse.c	(revision 187470)
> +++ gcc/cse.c	(working copy)
> @@ -3786,8 +3786,11 @@ equiv_constant (rtx x)
>  	    }
>  	}
>  
> -      /* Otherwise see if we already have a constant for the inner REG.  */
> +      /* Otherwise see if we already have a constant for the inner REG.
> +	 Don't bother with paradoxical subregs because we have no way
> +	 of knowing what the upper bytes are.  */
>        if (REG_P (SUBREG_REG (x))
> +	  && (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (imode))
>  	  && (new_rtx = equiv_constant (SUBREG_REG (x))) != 0)
>          return simplify_subreg (mode, new_rtx, imode, SUBREG_BYTE (x));
>
Steven Bosscher May 16, 2012, 11:01 p.m. UTC | #2
On Thu, May 17, 2012, Meador Inge <meadori ar codesourcery dot com> wrote:
>> ;; This is *not* equal to zero because the upper
>> ;; two bytes are undefined.
>> (insn 14 13 15 2 (set (reg:SI 142)
>>         (subreg:SI (reg:QI 141) 0))
>>      (expr_list:REG_EQUAL (const_int 0 [0])
>>         (nil)))

Hmm, this is what doc/rtl.texi has to say about paradoxical subregs as rvalues:

The high-order bits of rvalues are in the following circumstances:

* subreg regs
The upper bits are defined when SUBREG_PROMOTED_VAR_P is true.
SUBREG_PROMOTED_UNSIGNED_P describes what the upper bits hold.

The way I read this, suggests your patch allow simplify_subreg on
paradoxical subregs if SUBREG_PROMOTED_VAR_P is true. But I'm not
exactly an RTL expert, and I'm not 100% sure if this part of doc
applies here. ;-)

Ciao!
Steven
Richard Sandiford May 17, 2012, 8:02 p.m. UTC | #3
Steven Bosscher <stevenb.gcc@gmail.com> writes:
> On Thu, May 17, 2012, Meador Inge <meadori ar codesourcery dot com> wrote:
>>> ;; This is *not* equal to zero because the upper
>>> ;; two bytes are undefined.
>>> (insn 14 13 15 2 (set (reg:SI 142)
>>>         (subreg:SI (reg:QI 141) 0))
>>>      (expr_list:REG_EQUAL (const_int 0 [0])
>>>         (nil)))
>
> Hmm, this is what doc/rtl.texi has to say about paradoxical subregs as rvalues:
>
> The high-order bits of rvalues are in the following circumstances:
>
> * subreg regs
> The upper bits are defined when SUBREG_PROMOTED_VAR_P is true.
> SUBREG_PROMOTED_UNSIGNED_P describes what the upper bits hold.
>
> The way I read this, suggests your patch allow simplify_subreg on
> paradoxical subregs if SUBREG_PROMOTED_VAR_P is true. But I'm not
> exactly an RTL expert, and I'm not 100% sure if this part of doc
> applies here. ;-)

Yeah, I think that's just for non-paradoxical lowpart subregs.
I.e. for (subreg:M (reg:N ...)), it's making a guarantee about
the bits that are in N but not in M.

After agonising over this for a couple of days, I think it's probably
the correct fix.  What we're doing now would be valid if the only use of
equiv_constant(x) were to substitute for x.  The name and current use
of the function obviously require equality though.

Patch is OK, thanks.  It might be worth extending fold_rtx and
equiv_constant to set a flag that says whether the returned value
is equivalent or simply one of many valid replacement values.
We'd then be able to replace uses of registers like 142 with 0,
while not replacing uses of 0 with 142.  I don't know how much it
would buy us though.  That kind of thing is probably more of a job
for fwprop.

Please add UL to the hex constants in the testcase.  Not sure whether
it matters for 16-bit int targets or not, but better safe than sorry :-)

Also, rather than:

> +      /* Otherwise see if we already have a constant for the inner REG.
> +	 Don't bother with paradoxical subregs because we have no way
> +	 of knowing what the upper bytes are.  */

how about:

      /* Otherwise see if we already have a constant for the inner REG,
	 and if that is enough to calculate an equivalent constant for
	 the subreg.  Note that the upper bits of paradoxical subregs
	 are undefined, so they cannot be said to equal anything.  */

Richard
diff mbox

Patch

Index: gcc/testsuite/gcc.dg/pr53352.c
===================================================================
--- gcc/testsuite/gcc.dg/pr53352.c	(revision 0)
+++ gcc/testsuite/gcc.dg/pr53352.c	(revision 0)
@@ -0,0 +1,41 @@ 
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+#include <stdlib.h>
+
+typedef union
+{
+  struct
+  {
+    unsigned char a;
+    unsigned char b;
+    unsigned char c;
+    unsigned char d;
+  } parts;
+  unsigned long whole;
+} T;
+
+T *g_t;
+
+void bar (unsigned long x)
+{
+  if (x != 0)
+    abort ();
+}
+
+int main ()
+{
+  T one;
+  T two;
+  T tmp1, tmp2;
+
+  one.whole = 0xFFE0E0E0;
+  two.whole = 0xFF000000;
+  tmp1.parts = two.parts;
+  tmp2.parts = one.parts;
+  tmp2.parts.c = tmp1.parts.c;
+  one.parts = tmp2.parts;
+
+  g_t = &one;
+  bar (0);
+}
Index: gcc/cse.c
===================================================================
--- gcc/cse.c	(revision 187470)
+++ gcc/cse.c	(working copy)
@@ -3786,8 +3786,11 @@  equiv_constant (rtx x)
 	    }
 	}
 
-      /* Otherwise see if we already have a constant for the inner REG.  */
+      /* Otherwise see if we already have a constant for the inner REG.
+	 Don't bother with paradoxical subregs because we have no way
+	 of knowing what the upper bytes are.  */
       if (REG_P (SUBREG_REG (x))
+	  && (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (imode))
 	  && (new_rtx = equiv_constant (SUBREG_REG (x))) != 0)
         return simplify_subreg (mode, new_rtx, imode, SUBREG_BYTE (x));