diff mbox series

bitint: Fix up lower_addsub_overflow [PR115352]

Message ID ZmK2VzGvG6dAcEHy@tucnak
State New
Headers show
Series bitint: Fix up lower_addsub_overflow [PR115352] | expand

Commit Message

Jakub Jelinek June 7, 2024, 7:27 a.m. UTC
Hi!

The following testcase is miscompiled because of a flawed optimization.
If one changes the 65 in the testcase to e.g. 66, one gets:
...
  _25 = .USUBC (0, _24, _14);
  _12 = IMAGPART_EXPR <_25>;
  _26 = REALPART_EXPR <_25>;
  if (_23 >= 1)
    goto <bb 8>; [80.00%]
  else
    goto <bb 11>; [20.00%]

  <bb 8> :
  if (_23 != 1)
    goto <bb 10>; [80.00%]
  else
    goto <bb 9>; [20.00%]

  <bb 9> :
  _27 = (signed long) _26;
  _28 = _27 >> 1;
  _29 = (unsigned long) _28;
  _31 = _29 + 1;
  _30 = _31 > 1;
  goto <bb 11>; [100.00%]

  <bb 10> :
  _32 = _26 != _18;
  _33 = _22 | _32;

  <bb 11> :
  # _17 = PHI <_30(9), _22(7), _33(10)>
  # _19 = PHI <_29(9), _18(7), _18(10)>
...
so there is one path for limbs below the boundary (in this case there are
actually no limbs there, maybe we could consider optimizing that further,
say with simply folding that _23 >= 1 condition to 1 == 1 and letting
cfg cleanup handle it), another case where it is exactly the limb on the
boundary (that is the bb 9 handling where it extracts the interesting
bits (the first 3 statements) and then checks if it is zero or all ones and
finally the case of limbs above that where it compares the current result
limb against the previously recorded 0 or all ones and ors differences into
accumulated result.

Now, the optimization which the first hunk removes was based on the idea
that for that case the extraction of the interesting bits from the limb
don't need anything special, so the _27/_28/_29 statements above aren't
needed, the whole limb is interesting bits, so it handled the >= 1
case like the bb 9 above without the first 3 statements and bb 10 wasn't
there at all.  There are 2 problems with that, for the higher limbs it
only checks if the the result limb bits are all zeros or all ones, but
doesn't check if they are the same as the other extension bits, and
it forgets the previous flag whether there was an overflow.
First I wanted to fix it just by adding the _33 = _22 | _30; statement
to the end of bb 9 above, which fixed the originally filed huge testcase
and the first 2 foo calls in the testcase included in the patch, it no
longer forgets about previously checked differences from 0/1.
But as the last 2 foo calls show, it still didn't check whether each
even (or each odd depending on the exact position) result limb is
equal to the first one, so every second limb it could choose some other
0 vs. all ones value and as long as it repeated in another limb above it
it would be ok.

So, the optimization just can't work properly and the following patch
removes it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/14.2?

2024-06-07  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/115352
	* gimple-lower-bitint.cc (lower_addsub_overflow): Don't disable
	single_comparison if cmp_code is GE_EXPR.

	* gcc.dg/torture/bitint-71.c: New test.


	Jakub

Comments

Richard Biener June 7, 2024, 7:50 a.m. UTC | #1
On Fri, 7 Jun 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase is miscompiled because of a flawed optimization.
> If one changes the 65 in the testcase to e.g. 66, one gets:
> ...
>   _25 = .USUBC (0, _24, _14);
>   _12 = IMAGPART_EXPR <_25>;
>   _26 = REALPART_EXPR <_25>;
>   if (_23 >= 1)
>     goto <bb 8>; [80.00%]
>   else
>     goto <bb 11>; [20.00%]
> 
>   <bb 8> :
>   if (_23 != 1)
>     goto <bb 10>; [80.00%]
>   else
>     goto <bb 9>; [20.00%]
> 
>   <bb 9> :
>   _27 = (signed long) _26;
>   _28 = _27 >> 1;
>   _29 = (unsigned long) _28;
>   _31 = _29 + 1;
>   _30 = _31 > 1;
>   goto <bb 11>; [100.00%]
> 
>   <bb 10> :
>   _32 = _26 != _18;
>   _33 = _22 | _32;
> 
>   <bb 11> :
>   # _17 = PHI <_30(9), _22(7), _33(10)>
>   # _19 = PHI <_29(9), _18(7), _18(10)>
> ...
> so there is one path for limbs below the boundary (in this case there are
> actually no limbs there, maybe we could consider optimizing that further,
> say with simply folding that _23 >= 1 condition to 1 == 1 and letting
> cfg cleanup handle it), another case where it is exactly the limb on the
> boundary (that is the bb 9 handling where it extracts the interesting
> bits (the first 3 statements) and then checks if it is zero or all ones and
> finally the case of limbs above that where it compares the current result
> limb against the previously recorded 0 or all ones and ors differences into
> accumulated result.
> 
> Now, the optimization which the first hunk removes was based on the idea
> that for that case the extraction of the interesting bits from the limb
> don't need anything special, so the _27/_28/_29 statements above aren't
> needed, the whole limb is interesting bits, so it handled the >= 1
> case like the bb 9 above without the first 3 statements and bb 10 wasn't
> there at all.  There are 2 problems with that, for the higher limbs it
> only checks if the the result limb bits are all zeros or all ones, but
> doesn't check if they are the same as the other extension bits, and
> it forgets the previous flag whether there was an overflow.
> First I wanted to fix it just by adding the _33 = _22 | _30; statement
> to the end of bb 9 above, which fixed the originally filed huge testcase
> and the first 2 foo calls in the testcase included in the patch, it no
> longer forgets about previously checked differences from 0/1.
> But as the last 2 foo calls show, it still didn't check whether each
> even (or each odd depending on the exact position) result limb is
> equal to the first one, so every second limb it could choose some other
> 0 vs. all ones value and as long as it repeated in another limb above it
> it would be ok.
> 
> So, the optimization just can't work properly and the following patch
> removes it.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/14.2?

OK.

Richard.

> 2024-06-07  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/115352
> 	* gimple-lower-bitint.cc (lower_addsub_overflow): Don't disable
> 	single_comparison if cmp_code is GE_EXPR.
> 
> 	* gcc.dg/torture/bitint-71.c: New test.
> 
> --- gcc/gimple-lower-bitint.cc.jj	2024-04-12 10:59:48.233153262 +0200
> +++ gcc/gimple-lower-bitint.cc	2024-06-06 12:06:57.065717651 +0200
> @@ -4286,11 +4286,7 @@ bitint_large_huge::lower_addsub_overflow
>  		  bool single_comparison
>  		    = (startlimb + 2 >= fin || (startlimb & 1) != (i & 1));
>  		  if (!single_comparison)
> -		    {
> -		      cmp_code = GE_EXPR;
> -		      if (!check_zero && (start % limb_prec) == 0)
> -			single_comparison = true;
> -		    }
> +		    cmp_code = GE_EXPR;
>  		  else if ((startlimb & 1) == (i & 1))
>  		    cmp_code = EQ_EXPR;
>  		  else
> --- gcc/testsuite/gcc.dg/torture/bitint-71.c.jj	2024-06-06 12:20:55.824913276 +0200
> +++ gcc/testsuite/gcc.dg/torture/bitint-71.c	2024-06-06 12:20:45.260044338 +0200
> @@ -0,0 +1,28 @@
> +/* PR middle-end/115352 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 385
> +int
> +foo (_BitInt (385) b)
> +{
> +  return __builtin_sub_overflow_p (0, b, (_BitInt (65)) 0);
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#if __BITINT_MAXWIDTH__ >= 385
> +  if (!foo (-(_BitInt (385)) 0x00000000000000000c377e8a3fd1881fff035bb487a51c9ed1f7350befa7ec4450000000000000000a3cf8d1ebb723981wb))
> +    __builtin_abort ();
> +  if (!foo (-0x1ffffffffffffffffc377e8a3fd1881fff035bb487a51c9ed1f7350befa7ec445ffffffffffffffffa3cf8d1ebb723981uwb))
> +    __builtin_abort ();
> +  if (!foo (-(_BitInt (385)) 0x00000000000000000ffffffffffffffffffffffffffffffff00000000000000000000000000000000a3cf8d1ebb723981wb))
> +    __builtin_abort ();
> +  if (!foo (-0x1ffffffffffffffff00000000000000000000000000000000ffffffffffffffffffffffffffffffffa3cf8d1ebb723981uwb))
> +    __builtin_abort ();
> +#endif
> +}
> 
> 	Jakub
> 
>
diff mbox series

Patch

--- gcc/gimple-lower-bitint.cc.jj	2024-04-12 10:59:48.233153262 +0200
+++ gcc/gimple-lower-bitint.cc	2024-06-06 12:06:57.065717651 +0200
@@ -4286,11 +4286,7 @@  bitint_large_huge::lower_addsub_overflow
 		  bool single_comparison
 		    = (startlimb + 2 >= fin || (startlimb & 1) != (i & 1));
 		  if (!single_comparison)
-		    {
-		      cmp_code = GE_EXPR;
-		      if (!check_zero && (start % limb_prec) == 0)
-			single_comparison = true;
-		    }
+		    cmp_code = GE_EXPR;
 		  else if ((startlimb & 1) == (i & 1))
 		    cmp_code = EQ_EXPR;
 		  else
--- gcc/testsuite/gcc.dg/torture/bitint-71.c.jj	2024-06-06 12:20:55.824913276 +0200
+++ gcc/testsuite/gcc.dg/torture/bitint-71.c	2024-06-06 12:20:45.260044338 +0200
@@ -0,0 +1,28 @@ 
+/* PR middle-end/115352 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23" } */
+/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 385
+int
+foo (_BitInt (385) b)
+{
+  return __builtin_sub_overflow_p (0, b, (_BitInt (65)) 0);
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 385
+  if (!foo (-(_BitInt (385)) 0x00000000000000000c377e8a3fd1881fff035bb487a51c9ed1f7350befa7ec4450000000000000000a3cf8d1ebb723981wb))
+    __builtin_abort ();
+  if (!foo (-0x1ffffffffffffffffc377e8a3fd1881fff035bb487a51c9ed1f7350befa7ec445ffffffffffffffffa3cf8d1ebb723981uwb))
+    __builtin_abort ();
+  if (!foo (-(_BitInt (385)) 0x00000000000000000ffffffffffffffffffffffffffffffff00000000000000000000000000000000a3cf8d1ebb723981wb))
+    __builtin_abort ();
+  if (!foo (-0x1ffffffffffffffff00000000000000000000000000000000ffffffffffffffffffffffffffffffffa3cf8d1ebb723981uwb))
+    __builtin_abort ();
+#endif
+}