diff mbox series

[V4] Optimize '(X - N * M) / N' to 'X / N - M' if valid

Message ID 20230711090413.3587421-1-guojiufu@linux.ibm.com
State New
Headers show
Series [V4] Optimize '(X - N * M) / N' to 'X / N - M' if valid | expand

Commit Message

Jiufu Guo July 11, 2023, 9:04 a.m. UTC
Hi,

Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
if there is no wrap/overflow/underflow and "X - N * M" has the same
sign with "X".

Compare the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623028.html
- The APIs for checking overflow of range operation are moved to
other files: range-op and gimple-range.
- Improve the patterns with '(X + C)' for unsigned type.

Bootstrap & regtest pass on ppc64{,le} and x86_64.
Is this patch ok for trunk?

BR,
Jeff (Jiufu Guo)


	PR tree-optimization/108757

gcc/ChangeLog:

	* gimple-range.cc (arith_without_overflow_p): New function.
	(same_sign_p): New function.
	* gimple-range.h (arith_without_overflow_p): New declare.
	(same_sign_p): New declare.
	* match.pd ((X - N * M) / N): New pattern.
	((X + N * M) / N): New pattern.
	((X + C) div_rshift N): New pattern.
	* range-op.cc (plus_without_overflow_p): New function.
	(minus_without_overflow_p): New function.
	(mult_without_overflow_p): New function.
	* range-op.h (plus_without_overflow_p): New declare.
	(minus_without_overflow_p): New declare.
	(mult_without_overflow_p): New declare.
	* value-query.h (get_range): New function
	* value-range.cc (irange::nonnegative_p): New function.
	(irange::nonpositive_p): New function.
	* value-range.h (irange::nonnegative_p): New declare.
	(irange::nonpositive_p): New declare.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr108757-1.c: New test.
	* gcc.dg/pr108757-2.c: New test.
	* gcc.dg/pr108757.h: New test.

---
 gcc/gimple-range.cc               |  50 +++++++
 gcc/gimple-range.h                |   2 +
 gcc/match.pd                      |  64 ++++++++
 gcc/range-op.cc                   |  77 ++++++++++
 gcc/range-op.h                    |   4 +
 gcc/value-query.h                 |  10 ++
 gcc/value-range.cc                |  12 ++
 gcc/value-range.h                 |   2 +
 gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
 gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
 gcc/testsuite/gcc.dg/pr108757.h   | 233 ++++++++++++++++++++++++++++++
 11 files changed, 491 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757.h

Comments

Aldy Hernandez July 14, 2023, 11:27 a.m. UTC | #1
I don't know what you're trying to accomplish here, as I haven't been 
following the PR, but adding all these helper functions to the ranger 
header file seems wrong, especially since there's only one use of them. 
I see you're tweaking the irange API, adding helper functions to 
range-op (which is only for code dealing with implementing range 
operators for tree codes), etc etc.

If you need these helper functions, I suggest you put them closer to 
their uses (i.e. wherever the match.pd support machinery goes).

Aldy

On 7/11/23 11:04, Jiufu Guo wrote:
> Hi,
> 
> Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
> if there is no wrap/overflow/underflow and "X - N * M" has the same
> sign with "X".
> 
> Compare the previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623028.html
> - The APIs for checking overflow of range operation are moved to
> other files: range-op and gimple-range.
> - Improve the patterns with '(X + C)' for unsigned type.
> 
> Bootstrap & regtest pass on ppc64{,le} and x86_64.
> Is this patch ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
> 
> 	PR tree-optimization/108757
> 
> gcc/ChangeLog:
> 
> 	* gimple-range.cc (arith_without_overflow_p): New function.
> 	(same_sign_p): New function.
> 	* gimple-range.h (arith_without_overflow_p): New declare.
> 	(same_sign_p): New declare.
> 	* match.pd ((X - N * M) / N): New pattern.
> 	((X + N * M) / N): New pattern.
> 	((X + C) div_rshift N): New pattern.
> 	* range-op.cc (plus_without_overflow_p): New function.
> 	(minus_without_overflow_p): New function.
> 	(mult_without_overflow_p): New function.
> 	* range-op.h (plus_without_overflow_p): New declare.
> 	(minus_without_overflow_p): New declare.
> 	(mult_without_overflow_p): New declare.
> 	* value-query.h (get_range): New function
> 	* value-range.cc (irange::nonnegative_p): New function.
> 	(irange::nonpositive_p): New function.
> 	* value-range.h (irange::nonnegative_p): New declare.
> 	(irange::nonpositive_p): New declare.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/pr108757-1.c: New test.
> 	* gcc.dg/pr108757-2.c: New test.
> 	* gcc.dg/pr108757.h: New test.
> 
> ---
>   gcc/gimple-range.cc               |  50 +++++++
>   gcc/gimple-range.h                |   2 +
>   gcc/match.pd                      |  64 ++++++++
>   gcc/range-op.cc                   |  77 ++++++++++
>   gcc/range-op.h                    |   4 +
>   gcc/value-query.h                 |  10 ++
>   gcc/value-range.cc                |  12 ++
>   gcc/value-range.h                 |   2 +
>   gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
>   gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
>   gcc/testsuite/gcc.dg/pr108757.h   | 233 ++++++++++++++++++++++++++++++
>   11 files changed, 491 insertions(+)
>   create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>   create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>   create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
> 
> diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
> index 01e62d3ff3901143bde33dc73c0debf41d0c0fdd..620fe32e85e5fe3847a933554fc656b2939cf02d 100644
> --- a/gcc/gimple-range.cc
> +++ b/gcc/gimple-range.cc
> @@ -926,3 +926,53 @@ assume_query::dump (FILE *f)
>       }
>     fprintf (f, "------------------------------\n");
>   }
> +
> +/* Return true if the operation "X CODE Y" in type does not overflow
> +   underflow or wrap with value range info, otherwise return false.  */
> +
> +bool
> +arith_without_overflow_p (tree_code code, tree x, tree y, tree type)
> +{
> +  gcc_assert (INTEGRAL_TYPE_P (type));
> +
> +  if (TYPE_OVERFLOW_UNDEFINED (type))
> +    return true;
> +
> +  value_range vr0;
> +  value_range vr1;
> +  if (!(get_range (vr0, x) && get_range (vr1, y)))
> +    return false;
> +
> +  switch (code)
> +    {
> +    case PLUS_EXPR:
> +      return plus_without_overflow_p (vr0, vr1, type);
> +    case MINUS_EXPR:
> +      return minus_without_overflow_p (vr0, vr1, type);
> +    case MULT_EXPR:
> +      return mult_without_overflow_p (vr0, vr1, type);
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  return false;
> +}
> +
> +/* Return true if "X" and "Y" have the same sign or zero.  */
> +
> +bool
> +same_sign_p (tree x, tree y, tree type)
> +{
> +  gcc_assert (INTEGRAL_TYPE_P (type));
> +
> +  if (TYPE_UNSIGNED (type))
> +    return true;
> +
> +  value_range vr0;
> +  value_range vr1;
> +  if (!(get_range (vr0, x) && get_range (vr1, y)))
> +    return false;
> +
> +  return (vr0.nonnegative_p () && vr1.nonnegative_p ())
> +	 || (vr0.nonpositive_p () && vr1.nonpositive_p ());
> +}
> diff --git a/gcc/gimple-range.h b/gcc/gimple-range.h
> index 6587e4923ff44e10826a697ecced237a0ad23c88..84eac87392b642ed3305011415c804f5b319e09f 100644
> --- a/gcc/gimple-range.h
> +++ b/gcc/gimple-range.h
> @@ -101,5 +101,7 @@ protected:
>     gori_compute m_gori;
>   };
>   
> +bool arith_without_overflow_p (tree_code code, tree x, tree y, tree type);
> +bool same_sign_p (tree x, tree y, tree type);
>   
>   #endif // GCC_GIMPLE_RANGE_H
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 8543f777a28e4f39b2b2a40d0702aed88786bbb3..87e990c5b1ebbd116d7d7efdba62347d3a967cdd 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -942,6 +942,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   #endif
>      ))))
>   
> +#if GIMPLE
> +(for div (trunc_div exact_div)
> + /* Simplify (t + M*N) / N -> t / N + M.  */
> + (simplify
> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
> +  (if (INTEGRAL_TYPE_P (type)
> +       && arith_without_overflow_p (MULT_EXPR, @1, @2, type)
> +       && arith_without_overflow_p (PLUS_EXPR, @0, @3, type)
> +       && same_sign_p (@0, @4, type))
> +  (plus (div @0 @2) @1)))
> +
> + /* Simplify (t - M*N) / N -> t / N - M.  */
> + (simplify
> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
> +  (if (INTEGRAL_TYPE_P (type)
> +       && arith_without_overflow_p (MULT_EXPR, @1, @2, type)
> +       && arith_without_overflow_p (MINUS_EXPR, @0, @3, type)
> +       && same_sign_p (@0, @4, type))
> +  (minus (div @0 @2) @1))))
> +
> +/* Simplify
> +   (t + C) / N -> t / N + C / N where C is multiple of N.
> +   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
> +(for op (trunc_div exact_div rshift)
> + (simplify
> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
> +   (with
> +    {
> +      wide_int c = wi::to_wide (@1);
> +      wide_int n = wi::to_wide (@2);
> +      bool is_rshift = op == RSHIFT_EXPR;
> +      bool neg_c = false;
> +      bool ok = false;
> +      if (INTEGRAL_TYPE_P (type))
> +        {
> +	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> +			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
> +	  ok = ok && arith_without_overflow_p (PLUS_EXPR, @0, @1, type)
> +	       && same_sign_p (@0, @3, type);
> +
> +	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
> +	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
> +	    {
> +	      neg_c = true;
> +	      c = -c;
> +	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> +			     : wi::multiple_of_p (c, n, UNSIGNED);
> +	      value_range vr;
> +	      ok = ok && get_range (vr, @0)
> +		   && wi::geu_p (vr.lower_bound (), c);
> +	    }
> +	}
> +    }
> +   (if (ok)
> +   (with
> +    {
> +      wide_int m;
> +      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
> +		    : wi::div_trunc (c, n, TYPE_SIGN (type));
> +      m = neg_c ? -m : m;
> +    }
> +   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
> +#endif
> +
>   (for op (negate abs)
>    /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
>    (for coss (COS COSH)
> diff --git a/gcc/range-op.cc b/gcc/range-op.cc
> index cb584314f4cfc93aeec50c7f888829997a7dc8eb..487e781a0cdfa39416589983a246327957c14d54 100644
> --- a/gcc/range-op.cc
> +++ b/gcc/range-op.cc
> @@ -4311,6 +4311,83 @@ range_op_table::initialize_integral_ops ()
>   
>   }
>   
> +bool
> +plus_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
> +{
> +  wi::overflow_type ovf;
> +  signop sgn = TYPE_SIGN (type);
> +  wide_int wmax0 = vr0.upper_bound ();
> +  wide_int wmax1 = vr1.upper_bound ();
> +  wi::add (wmax0, wmax1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  if (TYPE_UNSIGNED (type))
> +    return true;
> +
> +  wide_int wmin0 = vr0.lower_bound ();
> +  wide_int wmin1 = vr1.lower_bound ();
> +  wi::add (wmin0, wmin1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  return true;
> +}
> +
> +bool
> +minus_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
> +{
> +  wi::overflow_type ovf;
> +  signop sgn = TYPE_SIGN (type);
> +  wide_int wmin0 = vr0.lower_bound ();
> +  wide_int wmax1 = vr1.upper_bound ();
> +  wi::sub (wmin0, wmax1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  if (TYPE_UNSIGNED (type))
> +    return true;
> +
> +  wide_int wmax0 = vr0.upper_bound ();
> +  wide_int wmin1 = vr1.lower_bound ();
> +  wi::sub (wmax0, wmin1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  return true;
> +}
> +
> +bool
> +mult_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
> +{
> +  wi::overflow_type ovf;
> +  signop sgn = TYPE_SIGN (type);
> +  wide_int wmax0 = vr0.upper_bound ();
> +  wide_int wmax1 = vr1.upper_bound ();
> +  wi::mul (wmax0, wmax1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  if (TYPE_UNSIGNED (type))
> +    return true;
> +
> +  wide_int wmin0 = vr0.lower_bound ();
> +  wide_int wmin1 = vr1.lower_bound ();
> +  wi::mul (wmin0, wmin1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  wi::mul (wmin0, wmax1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  wi::mul (wmax0, wmin1, sgn, &ovf);
> +  if (ovf != wi::OVF_NONE)
> +    return false;
> +
> +  return true;
> +}
> +
>   #if CHECKING_P
>   #include "selftest.h"
>   
> diff --git a/gcc/range-op.h b/gcc/range-op.h
> index af94c2756a74f710aa50aec1ac3b3de5eeb43a8e..1697ce5464c0cc8fbfb86234137ba7cc23e10979 100644
> --- a/gcc/range-op.h
> +++ b/gcc/range-op.h
> @@ -220,6 +220,10 @@ protected:
>     range_operator *m_operator;
>   };
>   
> +extern bool plus_without_overflow_p (value_range &, value_range &, tree);
> +extern bool minus_without_overflow_p (value_range &, value_range &, tree);
> +extern bool mult_without_overflow_p (value_range &, value_range &, tree);
> +
>   // Cast the range in R to TYPE if R supports TYPE.
>   
>   inline bool
> diff --git a/gcc/value-query.h b/gcc/value-query.h
> index d10c3eac1e2e6c477cbab026942a45b4fcc2ddce..cf488c7ea9bfc423fd3de9d2c743f71175804ae2 100644
> --- a/gcc/value-query.h
> +++ b/gcc/value-query.h
> @@ -137,6 +137,16 @@ get_range_query (const struct function *fun)
>     return (fun && fun->x_range_query) ? fun->x_range_query : &global_ranges;
>   }
>   
> +/* Return true if there is range for "X" expression at "S" statement,
> +   and the range is not varying and not undefined.   */
> +
> +inline bool
> +get_range (vrange &r, tree x, gimple *s = NULL)
> +{
> +  return get_range_query (cfun)->range_of_expr (r, x, s) && !r.varying_p ()
> +	 && !r.undefined_p ();
> +}
> +
>   // Query the global range of NAME in function F.  Default to cfun.
>   extern void gimple_range_global (vrange &v, tree name,
>   				 struct function *f = cfun);
> diff --git a/gcc/value-range.cc b/gcc/value-range.cc
> index 011bdbdeae62845b5627a8dfa0d261370df7e5db..5ae4e044194326c24aab6babf8e0df3e4b313e98 100644
> --- a/gcc/value-range.cc
> +++ b/gcc/value-range.cc
> @@ -313,6 +313,18 @@ add_vrange (const vrange &v, inchash::hash &hstate,
>   
>   } //namespace inchash
>   
> +bool
> +irange::nonnegative_p () const
> +{
> +  return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ()));
> +}
> +
> +bool
> +irange::nonpositive_p () const
> +{
> +  return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ()));
> +}
> +
>   bool
>   irange::supports_type_p (const_tree type) const
>   {
> diff --git a/gcc/value-range.h b/gcc/value-range.h
> index 0170188201bc9b1c4e117661c9a62819dc1547c5..a12dea514e48cdd04a3cee5dc6304637e501d399 100644
> --- a/gcc/value-range.h
> +++ b/gcc/value-range.h
> @@ -280,6 +280,8 @@ public:
>     virtual bool singleton_p (tree *result = NULL) const override;
>     bool singleton_p (wide_int &) const;
>     bool contains_p (const wide_int &) const;
> +  bool nonnegative_p () const;
> +  bool nonpositive_p () const;
>   
>     // In-place operators.
>     virtual bool union_ (const vrange &) override;
> diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c b/gcc/testsuite/gcc.dg/pr108757-1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..7908f4bdcb8e225fe311b668efbe8f6db525b4d5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
> @@ -0,0 +1,18 @@
> +/* PR tree-optimization/108757 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +#include <limits.h>
> +#define N 5
> +#define M 3
> +#define GAP 0
> +typedef unsigned int UINT;
> +typedef int INT;
> +#define UMAX UINT_MAX
> +#define IMAX INT_MAX
> +#define IMIN INT_MIN
> +#include "pr108757.h"
> +
> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ " "optimized" } } *
> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- " "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c b/gcc/testsuite/gcc.dg/pr108757-2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..82bebd09944f0be2e0690c604723bfe6182acec3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
> @@ -0,0 +1,19 @@
> +/* PR tree-optimization/108757 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
> +
> +#include <limits.h>
> +#define N 4
> +#define M 3
> +#define GAP 2
> +typedef unsigned int UINT;
> +typedef int INT;
> +#define UMAX UINT_MAX
> +#define IMAX INT_MAX
> +#define IMIN INT_MIN
> +#include "pr108757.h"
> +
> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3 "optimized" } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/pr108757.h b/gcc/testsuite/gcc.dg/pr108757.h
> new file mode 100644
> index 0000000000000000000000000000000000000000..5550199c1ef952f54eaa83087cec25e992057c34
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757.h
> @@ -0,0 +1,233 @@
> +#define NOINLINE __attribute__ ((noinline))
> +UINT NOINLINE
> +opt_u1 (UINT x)
> +{
> +  if (x < (M * N) - GAP)
> +    return 0;
> +  UINT a = x - (M * N);
> +  UINT b = a / N;
> +  return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u2 (UINT x)
> +{
> +  if (x > (UMAX - (M * N) + GAP))
> +    return 0;
> +  UINT a = x + (M * N);
> +  UINT b = a / N;
> +  return b - M;
> +}
> +
> +INT NOINLINE
> +opt_s1 (INT x)
> +{
> +  if (x < (M * N) - GAP)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s2 (INT x)
> +{
> +  if (x < IMIN + (M * N) - GAP || x > 0)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s3 (INT x)
> +{
> +  if (x < (M * N) - GAP)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / -N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s4 (INT x)
> +{
> +  if (x < IMIN + (M * N) - GAP || x > 0)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / -N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s5 (INT x)
> +{
> +  if (x > (-M * N) + GAP)
> +    return 0;
> +  INT a = x - (-M * N);
> +  INT b = a / N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s6 (INT x)
> +{
> +  if (x > IMAX - (M * N) + GAP || x < 0)
> +    return 0;
> +  INT a = x - (-M * N);
> +  INT b = a / N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s7 (INT x)
> +{
> +  if (x > (M * -N) + GAP)
> +    return 0;
> +  INT a = x - (M * -N);
> +  INT b = a / -N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s8 (INT x)
> +{
> +  if (x > IMAX - (M * N) + GAP || x < 0)
> +    return 0;
> +  INT a = x - (M * -N);
> +  INT b = a / -N;
> +  return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u3 (UINT x)
> +{
> +  if (x < (M << N) - GAP)
> +    return 0;
> +  UINT a = x - (M << N);
> +  UINT b = a >> N;
> +  return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u4 (UINT x)
> +{
> +  if (x > (UMAX - (M << N)) + GAP)
> +    return 0;
> +  UINT a = x + (M << N);
> +  UINT b = a >> N;
> +  return b - M;
> +}
> +
> +INT NOINLINE
> +opt_s9 (INT x)
> +{
> +  if (x < (M << N) - GAP)
> +    return 0;
> +  INT a = x - (M << N);
> +  INT b = a >> N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s10 (INT x)
> +{
> +  if (x < IMIN + (M << N) - GAP || x > 0)
> +    return 0;
> +  INT a = x - (M << N);
> +  INT b = a >> N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s11 (INT x)
> +{
> +  if (x > (-M << N) + GAP)
> +    return 0;
> +  INT a = x - (-M << N);
> +  INT b = a >> N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s12 (INT x)
> +{
> +  if (x > IMAX - (M << N) + GAP || x < 0)
> +    return 0;
> +  INT a = x - (-M << N);
> +  INT b = a >> N;
> +  return b + -M;
> +}
> +
> +UINT NOINLINE
> +opt_u5 (UINT x, UINT n, UINT m)
> +{
> +  if (n > N || m > M)
> +    return 0;
> +  if (x < (M*N) - GAP)
> +    return 0;
> +  UINT a = x - (m * n);
> +  UINT b = a / n;
> +  return b + m;
> +}
> +
> +UINT NOINLINE
> +opt_u6 (UINT x, UINT n, UINT m)
> +{
> +  if (n > N || m > M)
> +    return 0;
> +  if (x > (UMAX - M*N) + GAP)
> +    return 0;
> +  UINT a = x + (m * n);
> +  UINT b = a / n;
> +  return b - m;
> +}
> +
> +INT NOINLINE
> +opt_s13 (INT x, INT n, INT m)
> +{
> +  if (n > N || m > M || n < 0 || m < 0)
> +    return 0;
> +  if (x < (M*N) - GAP)
> +    return 0;
> +  INT a = x - (m * n);
> +  INT b = a / n;
> +  return b + m;
> +}
> +
> +INT NOINLINE
> +opt_s14 (INT x, INT n, INT m)
> +{
> +  if (n > N || m > M || n < 0 || m < 0)
> +    return 0;
> +  if (x > -M*N + GAP)
> +    return 0;
> +  INT a = x + (m * n);
> +  INT b = a / n;
> +  return b - m;
> +}
> +
> +INT
> +opt_s15 (INT x, INT n, INT m)
> +{
> +  if (n > 0 || m > 0 || n < -N || m < -M)
> +    return 0;
> +  if (x < (M*N) - GAP)
> +    return 0;
> +  INT a = x - (m * n);
> +  INT b = a / n;
> +  return b + m;
> +}
> +
> +INT NOINLINE
> +opt_s16 (INT x, INT n, INT m)
> +{
> +  if (n > 0 || m > 0 || n < -N || m < -M)
> +    return 0;
> +  if (x < 0 || x > (IMAX - M*N) + GAP)
> +    return 0;
> +  INT a = x + (m * n);
> +  INT b = a / n;
> +  return b - m;
> +}
> +
Richard Biener July 14, 2023, 1:37 p.m. UTC | #2
On Fri, 14 Jul 2023, Aldy Hernandez wrote:

> I don't know what you're trying to accomplish here, as I haven't been
> following the PR, but adding all these helper functions to the ranger header
> file seems wrong, especially since there's only one use of them. I see you're
> tweaking the irange API, adding helper functions to range-op (which is only
> for code dealing with implementing range operators for tree codes), etc etc.
> 
> If you need these helper functions, I suggest you put them closer to their
> uses (i.e. wherever the match.pd support machinery goes).

Note I suggested the opposite beacuse I thought these kind of helpers
are closer to value-range support than to match.pd.

But I take away from your answer that there's nothing close in the 
value-range machinery that answers the question whether A op B may
overflow?

Richard.

> Aldy
> 
> On 7/11/23 11:04, Jiufu Guo wrote:
> > Hi,
> > 
> > Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
> > if there is no wrap/overflow/underflow and "X - N * M" has the same
> > sign with "X".
> > 
> > Compare the previous version:
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-June/623028.html
> > - The APIs for checking overflow of range operation are moved to
> > other files: range-op and gimple-range.
> > - Improve the patterns with '(X + C)' for unsigned type.
> > 
> > Bootstrap & regtest pass on ppc64{,le} and x86_64.
> > Is this patch ok for trunk?
> > 
> > BR,
> > Jeff (Jiufu Guo)
> > 
> > 
> >  PR tree-optimization/108757
> > 
> > gcc/ChangeLog:
> > 
> >  * gimple-range.cc (arith_without_overflow_p): New function.
> >  (same_sign_p): New function.
> >  * gimple-range.h (arith_without_overflow_p): New declare.
> >  (same_sign_p): New declare.
> >  * match.pd ((X - N * M) / N): New pattern.
> >  ((X + N * M) / N): New pattern.
> >  ((X + C) div_rshift N): New pattern.
> >  * range-op.cc (plus_without_overflow_p): New function.
> >  (minus_without_overflow_p): New function.
> >  (mult_without_overflow_p): New function.
> >  * range-op.h (plus_without_overflow_p): New declare.
> >  (minus_without_overflow_p): New declare.
> >  (mult_without_overflow_p): New declare.
> >  * value-query.h (get_range): New function
> >  * value-range.cc (irange::nonnegative_p): New function.
> >  (irange::nonpositive_p): New function.
> >  * value-range.h (irange::nonnegative_p): New declare.
> >  (irange::nonpositive_p): New declare.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> >  * gcc.dg/pr108757-1.c: New test.
> >  * gcc.dg/pr108757-2.c: New test.
> >  * gcc.dg/pr108757.h: New test.
> > 
> > ---
> >   gcc/gimple-range.cc               |  50 +++++++
> >   gcc/gimple-range.h                |   2 +
> >   gcc/match.pd                      |  64 ++++++++
> >   gcc/range-op.cc                   |  77 ++++++++++
> >   gcc/range-op.h                    |   4 +
> >   gcc/value-query.h                 |  10 ++
> >   gcc/value-range.cc                |  12 ++
> >   gcc/value-range.h                 |   2 +
> >   gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
> >   gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
> >   gcc/testsuite/gcc.dg/pr108757.h   | 233 ++++++++++++++++++++++++++++++
> >   11 files changed, 491 insertions(+)
> >   create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
> >   create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
> >   create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
> > 
> > diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
> > index
> > 01e62d3ff3901143bde33dc73c0debf41d0c0fdd..620fe32e85e5fe3847a933554fc656b2939cf02d
> > 100644
> > --- a/gcc/gimple-range.cc
> > +++ b/gcc/gimple-range.cc
> > @@ -926,3 +926,53 @@ assume_query::dump (FILE *f)
> >       }
> >     fprintf (f, "------------------------------\n");
> >   }
> > +
> > +/* Return true if the operation "X CODE Y" in type does not overflow
> > +   underflow or wrap with value range info, otherwise return false.  */
> > +
> > +bool
> > +arith_without_overflow_p (tree_code code, tree x, tree y, tree type)
> > +{
> > +  gcc_assert (INTEGRAL_TYPE_P (type));
> > +
> > +  if (TYPE_OVERFLOW_UNDEFINED (type))
> > +    return true;
> > +
> > +  value_range vr0;
> > +  value_range vr1;
> > +  if (!(get_range (vr0, x) && get_range (vr1, y)))
> > +    return false;
> > +
> > +  switch (code)
> > +    {
> > +    case PLUS_EXPR:
> > +      return plus_without_overflow_p (vr0, vr1, type);
> > +    case MINUS_EXPR:
> > +      return minus_without_overflow_p (vr0, vr1, type);
> > +    case MULT_EXPR:
> > +      return mult_without_overflow_p (vr0, vr1, type);
> > +    default:
> > +      gcc_unreachable ();
> > +    }
> > +
> > +  return false;
> > +}
> > +
> > +/* Return true if "X" and "Y" have the same sign or zero.  */
> > +
> > +bool
> > +same_sign_p (tree x, tree y, tree type)
> > +{
> > +  gcc_assert (INTEGRAL_TYPE_P (type));
> > +
> > +  if (TYPE_UNSIGNED (type))
> > +    return true;
> > +
> > +  value_range vr0;
> > +  value_range vr1;
> > +  if (!(get_range (vr0, x) && get_range (vr1, y)))
> > +    return false;
> > +
> > +  return (vr0.nonnegative_p () && vr1.nonnegative_p ())
> > +	 || (vr0.nonpositive_p () && vr1.nonpositive_p ());
> > +}
> > diff --git a/gcc/gimple-range.h b/gcc/gimple-range.h
> > index
> > 6587e4923ff44e10826a697ecced237a0ad23c88..84eac87392b642ed3305011415c804f5b319e09f
> > 100644
> > --- a/gcc/gimple-range.h
> > +++ b/gcc/gimple-range.h
> > @@ -101,5 +101,7 @@ protected:
> >     gori_compute m_gori;
> >   };
> >   
> > +bool arith_without_overflow_p (tree_code code, tree x, tree y, tree type);
> > +bool same_sign_p (tree x, tree y, tree type);
> >   
> >   #endif // GCC_GIMPLE_RANGE_H
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index
> > 8543f777a28e4f39b2b2a40d0702aed88786bbb3..87e990c5b1ebbd116d7d7efdba62347d3a967cdd
> > 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -942,6 +942,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   #endif
> >      ))))
> >   +#if GIMPLE
> > +(for div (trunc_div exact_div)
> > + /* Simplify (t + M*N) / N -> t / N + M.  */
> > + (simplify
> > +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
> > +  (if (INTEGRAL_TYPE_P (type)
> > +       && arith_without_overflow_p (MULT_EXPR, @1, @2, type)
> > +       && arith_without_overflow_p (PLUS_EXPR, @0, @3, type)
> > +       && same_sign_p (@0, @4, type))
> > +  (plus (div @0 @2) @1)))
> > +
> > + /* Simplify (t - M*N) / N -> t / N - M.  */
> > + (simplify
> > +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
> > +  (if (INTEGRAL_TYPE_P (type)
> > +       && arith_without_overflow_p (MULT_EXPR, @1, @2, type)
> > +       && arith_without_overflow_p (MINUS_EXPR, @0, @3, type)
> > +       && same_sign_p (@0, @4, type))
> > +  (minus (div @0 @2) @1))))
> > +
> > +/* Simplify
> > +   (t + C) / N -> t / N + C / N where C is multiple of N.
> > +   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
> > +(for op (trunc_div exact_div rshift)
> > + (simplify
> > +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
> > +   (with
> > +    {
> > +      wide_int c = wi::to_wide (@1);
> > +      wide_int n = wi::to_wide (@2);
> > +      bool is_rshift = op == RSHIFT_EXPR;
> > +      bool neg_c = false;
> > +      bool ok = false;
> > +      if (INTEGRAL_TYPE_P (type))
> > +        {
> > +	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> > +			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
> > +	  ok = ok && arith_without_overflow_p (PLUS_EXPR, @0, @1, type)
> > +	       && same_sign_p (@0, @3, type);
> > +
> > +	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
> > +	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
> > +	    {
> > +	      neg_c = true;
> > +	      c = -c;
> > +	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> > +			     : wi::multiple_of_p (c, n, UNSIGNED);
> > +	      value_range vr;
> > +	      ok = ok && get_range (vr, @0)
> > +		   && wi::geu_p (vr.lower_bound (), c);
> > +	    }
> > +	}
> > +    }
> > +   (if (ok)
> > +   (with
> > +    {
> > +      wide_int m;
> > +      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
> > +		    : wi::div_trunc (c, n, TYPE_SIGN (type));
> > +      m = neg_c ? -m : m;
> > +    }
> > +   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
> > +#endif
> > +
> >   (for op (negate abs)
> >    /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
> >    (for coss (COS COSH)
> > diff --git a/gcc/range-op.cc b/gcc/range-op.cc
> > index
> > cb584314f4cfc93aeec50c7f888829997a7dc8eb..487e781a0cdfa39416589983a246327957c14d54
> > 100644
> > --- a/gcc/range-op.cc
> > +++ b/gcc/range-op.cc
> > @@ -4311,6 +4311,83 @@ range_op_table::initialize_integral_ops ()
> >   
> >   }
> >   
> > +bool
> > +plus_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
> > +{
> > +  wi::overflow_type ovf;
> > +  signop sgn = TYPE_SIGN (type);
> > +  wide_int wmax0 = vr0.upper_bound ();
> > +  wide_int wmax1 = vr1.upper_bound ();
> > +  wi::add (wmax0, wmax1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  if (TYPE_UNSIGNED (type))
> > +    return true;
> > +
> > +  wide_int wmin0 = vr0.lower_bound ();
> > +  wide_int wmin1 = vr1.lower_bound ();
> > +  wi::add (wmin0, wmin1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  return true;
> > +}
> > +
> > +bool
> > +minus_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
> > +{
> > +  wi::overflow_type ovf;
> > +  signop sgn = TYPE_SIGN (type);
> > +  wide_int wmin0 = vr0.lower_bound ();
> > +  wide_int wmax1 = vr1.upper_bound ();
> > +  wi::sub (wmin0, wmax1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  if (TYPE_UNSIGNED (type))
> > +    return true;
> > +
> > +  wide_int wmax0 = vr0.upper_bound ();
> > +  wide_int wmin1 = vr1.lower_bound ();
> > +  wi::sub (wmax0, wmin1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  return true;
> > +}
> > +
> > +bool
> > +mult_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
> > +{
> > +  wi::overflow_type ovf;
> > +  signop sgn = TYPE_SIGN (type);
> > +  wide_int wmax0 = vr0.upper_bound ();
> > +  wide_int wmax1 = vr1.upper_bound ();
> > +  wi::mul (wmax0, wmax1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  if (TYPE_UNSIGNED (type))
> > +    return true;
> > +
> > +  wide_int wmin0 = vr0.lower_bound ();
> > +  wide_int wmin1 = vr1.lower_bound ();
> > +  wi::mul (wmin0, wmin1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  wi::mul (wmin0, wmax1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  wi::mul (wmax0, wmin1, sgn, &ovf);
> > +  if (ovf != wi::OVF_NONE)
> > +    return false;
> > +
> > +  return true;
> > +}
> > +
> >   #if CHECKING_P
> >   #include "selftest.h"
> >   
> > diff --git a/gcc/range-op.h b/gcc/range-op.h
> > index
> > af94c2756a74f710aa50aec1ac3b3de5eeb43a8e..1697ce5464c0cc8fbfb86234137ba7cc23e10979
> > 100644
> > --- a/gcc/range-op.h
> > +++ b/gcc/range-op.h
> > @@ -220,6 +220,10 @@ protected:
> >     range_operator *m_operator;
> >   };
> >   
> > +extern bool plus_without_overflow_p (value_range &, value_range &, tree);
> > +extern bool minus_without_overflow_p (value_range &, value_range &, tree);
> > +extern bool mult_without_overflow_p (value_range &, value_range &, tree);
> > +
> >   // Cast the range in R to TYPE if R supports TYPE.
> >   
> >   inline bool
> > diff --git a/gcc/value-query.h b/gcc/value-query.h
> > index
> > d10c3eac1e2e6c477cbab026942a45b4fcc2ddce..cf488c7ea9bfc423fd3de9d2c743f71175804ae2
> > 100644
> > --- a/gcc/value-query.h
> > +++ b/gcc/value-query.h
> > @@ -137,6 +137,16 @@ get_range_query (const struct function *fun)
> >     return (fun && fun->x_range_query) ? fun->x_range_query :
> >   &global_ranges;
> >   }
> >   
> > +/* Return true if there is range for "X" expression at "S" statement,
> > +   and the range is not varying and not undefined.   */
> > +
> > +inline bool
> > +get_range (vrange &r, tree x, gimple *s = NULL)
> > +{
> > +  return get_range_query (cfun)->range_of_expr (r, x, s) && !r.varying_p ()
> > +	 && !r.undefined_p ();
> > +}
> > +
> >   // Query the global range of NAME in function F.  Default to cfun.
> >   extern void gimple_range_global (vrange &v, tree name,
> >   				 struct function *f = cfun);
> > diff --git a/gcc/value-range.cc b/gcc/value-range.cc
> > index
> > 011bdbdeae62845b5627a8dfa0d261370df7e5db..5ae4e044194326c24aab6babf8e0df3e4b313e98
> > 100644
> > --- a/gcc/value-range.cc
> > +++ b/gcc/value-range.cc
> > @@ -313,6 +313,18 @@ add_vrange (const vrange &v, inchash::hash &hstate,
> >   
> >   } //namespace inchash
> >   
> > +bool
> > +irange::nonnegative_p () const
> > +{
> > +  return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ()));
> > +}
> > +
> > +bool
> > +irange::nonpositive_p () const
> > +{
> > +  return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ()));
> > +}
> > +
> >   bool
> >   irange::supports_type_p (const_tree type) const
> >   {
> > diff --git a/gcc/value-range.h b/gcc/value-range.h
> > index
> > 0170188201bc9b1c4e117661c9a62819dc1547c5..a12dea514e48cdd04a3cee5dc6304637e501d399
> > 100644
> > --- a/gcc/value-range.h
> > +++ b/gcc/value-range.h
> > @@ -280,6 +280,8 @@ public:
> >     virtual bool singleton_p (tree *result = NULL) const override;
> >     bool singleton_p (wide_int &) const;
> >     bool contains_p (const wide_int &) const;
> > +  bool nonnegative_p () const;
> > +  bool nonpositive_p () const;
> >   
> >     // In-place operators.
> >     virtual bool union_ (const vrange &) override;
> > diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c
> > b/gcc/testsuite/gcc.dg/pr108757-1.c
> > new file mode 100644
> > index
> > 0000000000000000000000000000000000000000..7908f4bdcb8e225fe311b668efbe8f6db525b4d5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
> > @@ -0,0 +1,18 @@
> > +/* PR tree-optimization/108757 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +#include <limits.h>
> > +#define N 5
> > +#define M 3
> > +#define GAP 0
> > +typedef unsigned int UINT;
> > +typedef int INT;
> > +#define UMAX UINT_MAX
> > +#define IMAX INT_MAX
> > +#define IMIN INT_MIN
> > +#include "pr108757.h"
> > +
> > +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ " "optimized"
> > } } *
> > +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- " "optimized"
> > } } */
> > +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
> > diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c
> > b/gcc/testsuite/gcc.dg/pr108757-2.c
> > new file mode 100644
> > index
> > 0000000000000000000000000000000000000000..82bebd09944f0be2e0690c604723bfe6182acec3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
> > @@ -0,0 +1,19 @@
> > +/* PR tree-optimization/108757 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
> > +
> > +#include <limits.h>
> > +#define N 4
> > +#define M 3
> > +#define GAP 2
> > +typedef unsigned int UINT;
> > +typedef int INT;
> > +#define UMAX UINT_MAX
> > +#define IMAX INT_MAX
> > +#define IMIN INT_MIN
> > +#include "pr108757.h"
> > +
> > +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16
> > "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3
> > "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3 "optimized"
> > } } */
> > +
> > diff --git a/gcc/testsuite/gcc.dg/pr108757.h
> > b/gcc/testsuite/gcc.dg/pr108757.h
> > new file mode 100644
> > index
> > 0000000000000000000000000000000000000000..5550199c1ef952f54eaa83087cec25e992057c34
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr108757.h
> > @@ -0,0 +1,233 @@
> > +#define NOINLINE __attribute__ ((noinline))
> > +UINT NOINLINE
> > +opt_u1 (UINT x)
> > +{
> > +  if (x < (M * N) - GAP)
> > +    return 0;
> > +  UINT a = x - (M * N);
> > +  UINT b = a / N;
> > +  return b + M;
> > +}
> > +
> > +UINT NOINLINE
> > +opt_u2 (UINT x)
> > +{
> > +  if (x > (UMAX - (M * N) + GAP))
> > +    return 0;
> > +  UINT a = x + (M * N);
> > +  UINT b = a / N;
> > +  return b - M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s1 (INT x)
> > +{
> > +  if (x < (M * N) - GAP)
> > +    return 0;
> > +  INT a = x - (M * N);
> > +  INT b = a / N;
> > +  return b + M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s2 (INT x)
> > +{
> > +  if (x < IMIN + (M * N) - GAP || x > 0)
> > +    return 0;
> > +  INT a = x - (M * N);
> > +  INT b = a / N;
> > +  return b + M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s3 (INT x)
> > +{
> > +  if (x < (M * N) - GAP)
> > +    return 0;
> > +  INT a = x - (M * N);
> > +  INT b = a / -N;
> > +  return b + -M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s4 (INT x)
> > +{
> > +  if (x < IMIN + (M * N) - GAP || x > 0)
> > +    return 0;
> > +  INT a = x - (M * N);
> > +  INT b = a / -N;
> > +  return b + -M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s5 (INT x)
> > +{
> > +  if (x > (-M * N) + GAP)
> > +    return 0;
> > +  INT a = x - (-M * N);
> > +  INT b = a / N;
> > +  return b + -M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s6 (INT x)
> > +{
> > +  if (x > IMAX - (M * N) + GAP || x < 0)
> > +    return 0;
> > +  INT a = x - (-M * N);
> > +  INT b = a / N;
> > +  return b + -M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s7 (INT x)
> > +{
> > +  if (x > (M * -N) + GAP)
> > +    return 0;
> > +  INT a = x - (M * -N);
> > +  INT b = a / -N;
> > +  return b + M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s8 (INT x)
> > +{
> > +  if (x > IMAX - (M * N) + GAP || x < 0)
> > +    return 0;
> > +  INT a = x - (M * -N);
> > +  INT b = a / -N;
> > +  return b + M;
> > +}
> > +
> > +UINT NOINLINE
> > +opt_u3 (UINT x)
> > +{
> > +  if (x < (M << N) - GAP)
> > +    return 0;
> > +  UINT a = x - (M << N);
> > +  UINT b = a >> N;
> > +  return b + M;
> > +}
> > +
> > +UINT NOINLINE
> > +opt_u4 (UINT x)
> > +{
> > +  if (x > (UMAX - (M << N)) + GAP)
> > +    return 0;
> > +  UINT a = x + (M << N);
> > +  UINT b = a >> N;
> > +  return b - M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s9 (INT x)
> > +{
> > +  if (x < (M << N) - GAP)
> > +    return 0;
> > +  INT a = x - (M << N);
> > +  INT b = a >> N;
> > +  return b + M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s10 (INT x)
> > +{
> > +  if (x < IMIN + (M << N) - GAP || x > 0)
> > +    return 0;
> > +  INT a = x - (M << N);
> > +  INT b = a >> N;
> > +  return b + M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s11 (INT x)
> > +{
> > +  if (x > (-M << N) + GAP)
> > +    return 0;
> > +  INT a = x - (-M << N);
> > +  INT b = a >> N;
> > +  return b + -M;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s12 (INT x)
> > +{
> > +  if (x > IMAX - (M << N) + GAP || x < 0)
> > +    return 0;
> > +  INT a = x - (-M << N);
> > +  INT b = a >> N;
> > +  return b + -M;
> > +}
> > +
> > +UINT NOINLINE
> > +opt_u5 (UINT x, UINT n, UINT m)
> > +{
> > +  if (n > N || m > M)
> > +    return 0;
> > +  if (x < (M*N) - GAP)
> > +    return 0;
> > +  UINT a = x - (m * n);
> > +  UINT b = a / n;
> > +  return b + m;
> > +}
> > +
> > +UINT NOINLINE
> > +opt_u6 (UINT x, UINT n, UINT m)
> > +{
> > +  if (n > N || m > M)
> > +    return 0;
> > +  if (x > (UMAX - M*N) + GAP)
> > +    return 0;
> > +  UINT a = x + (m * n);
> > +  UINT b = a / n;
> > +  return b - m;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s13 (INT x, INT n, INT m)
> > +{
> > +  if (n > N || m > M || n < 0 || m < 0)
> > +    return 0;
> > +  if (x < (M*N) - GAP)
> > +    return 0;
> > +  INT a = x - (m * n);
> > +  INT b = a / n;
> > +  return b + m;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s14 (INT x, INT n, INT m)
> > +{
> > +  if (n > N || m > M || n < 0 || m < 0)
> > +    return 0;
> > +  if (x > -M*N + GAP)
> > +    return 0;
> > +  INT a = x + (m * n);
> > +  INT b = a / n;
> > +  return b - m;
> > +}
> > +
> > +INT
> > +opt_s15 (INT x, INT n, INT m)
> > +{
> > +  if (n > 0 || m > 0 || n < -N || m < -M)
> > +    return 0;
> > +  if (x < (M*N) - GAP)
> > +    return 0;
> > +  INT a = x - (m * n);
> > +  INT b = a / n;
> > +  return b + m;
> > +}
> > +
> > +INT NOINLINE
> > +opt_s16 (INT x, INT n, INT m)
> > +{
> > +  if (n > 0 || m > 0 || n < -N || m < -M)
> > +    return 0;
> > +  if (x < 0 || x > (IMAX - M*N) + GAP)
> > +    return 0;
> > +  INT a = x + (m * n);
> > +  INT b = a / n;
> > +  return b - m;
> > +}
> > +
> 
> 
>
Aldy Hernandez July 14, 2023, 2:12 p.m. UTC | #3
On 7/14/23 15:37, Richard Biener wrote:
> On Fri, 14 Jul 2023, Aldy Hernandez wrote:
> 
>> I don't know what you're trying to accomplish here, as I haven't been
>> following the PR, but adding all these helper functions to the ranger header
>> file seems wrong, especially since there's only one use of them. I see you're
>> tweaking the irange API, adding helper functions to range-op (which is only
>> for code dealing with implementing range operators for tree codes), etc etc.
>>
>> If you need these helper functions, I suggest you put them closer to their
>> uses (i.e. wherever the match.pd support machinery goes).
> 
> Note I suggested the opposite beacuse I thought these kind of helpers
> are closer to value-range support than to match.pd.

Oh sorry, I missed that.

> 
> But I take away from your answer that there's nothing close in the
> value-range machinery that answers the question whether A op B may
> overflow?

Not currently.

I vaguely recall we talked about some mechanism for doing range 
operations in a wider precision and comparing them with the result of 
doing it in the natural precision, and if the results differ, it must 
have overflowed.

*hunts down PR*

Comment 23 here:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100499#c23

Would something like that work?

I would prefer something more general, rather than having to re-invent 
every range-op entry to check for overflow.

Aldy
Andrew MacLeod July 14, 2023, 9 p.m. UTC | #4
On 7/14/23 09:37, Richard Biener wrote:
> On Fri, 14 Jul 2023, Aldy Hernandez wrote:
>
>> I don't know what you're trying to accomplish here, as I haven't been
>> following the PR, but adding all these helper functions to the ranger header
>> file seems wrong, especially since there's only one use of them. I see you're
>> tweaking the irange API, adding helper functions to range-op (which is only
>> for code dealing with implementing range operators for tree codes), etc etc.
>>
>> If you need these helper functions, I suggest you put them closer to their
>> uses (i.e. wherever the match.pd support machinery goes).
> Note I suggested the opposite beacuse I thought these kind of helpers
> are closer to value-range support than to match.pd.


probably vr-values.{cc.h} and  the simply_using_ranges paradigm would be 
the most sensible place to put these kinds of auxiliary routines?


>
> But I take away from your answer that there's nothing close in the
> value-range machinery that answers the question whether A op B may
> overflow?

we dont track it in ranges themselves.   During calculation of a range 
we obviously know, but propagating that generally when we rarely care 
doesn't seem worthwhile.  The very first generation of irange 6 years 
ago had an overflow_p() flag, but it was removed as not being worth 
keeping.     easier to simply ask the question when it matters

As the routines show, it pretty easy to figure out when the need arises 
so I think that should suffice.  At least for now,

Should we decide we would like it in general, it wouldnt be hard to add 
to irange.  wi_fold() cuurently returns null, it could easily return a 
bool indicating if an overflow happened, and wi_fold_in_parts and 
fold_range would simply OR the results all together of the compoent 
wi_fold() calls.  It would require updating/audfiting  a number of 
range-op entries and adding an overflowed_p()  query to irange.

Andrew
Jiufu Guo July 17, 2023, 6:27 a.m. UTC | #5
Hi Andrew, Aldy and Richard,

Thanks a lot for all your very helpful comments!

Andrew MacLeod <amacleod@redhat.com> writes:

> On 7/14/23 09:37, Richard Biener wrote:
>> On Fri, 14 Jul 2023, Aldy Hernandez wrote:
>>
>>> I don't know what you're trying to accomplish here, as I haven't been
>>> following the PR, but adding all these helper functions to the ranger header
>>> file seems wrong, especially since there's only one use of them. I see you're
>>> tweaking the irange API, adding helper functions to range-op (which is only
>>> for code dealing with implementing range operators for tree codes), etc etc.
>>>
>>> If you need these helper functions, I suggest you put them closer to their
>>> uses (i.e. wherever the match.pd support machinery goes).
>> Note I suggested the opposite beacuse I thought these kind of helpers
>> are closer to value-range support than to match.pd.
>
>
> probably vr-values.{cc.h} and  the simply_using_ranges paradigm would
> be the most sensible place to put these kinds of auxiliary routines?

Thanks! Richard also mentioned this as an example of using VR APIs.
I did not use vr-values.h/cc just because it seems vr-values are not
used/included in match.pd directly yet.

>
>
>>
>> But I take away from your answer that there's nothing close in the
>> value-range machinery that answers the question whether A op B may
>> overflow?
>
> we dont track it in ranges themselves.   During calculation of a range
> we obviously know, but propagating that generally when we rarely care
> doesn't seem worthwhile.  The very first generation of irange 6 years
> ago had an overflow_p() flag, but it was removed as not being worth
> keeping.     easier to simply ask the question when it matters

Right, agree!

>
> As the routines show, it pretty easy to figure out when the need arises so I think that should suffice.  At least for now,
>
> Should we decide we would like it in general, it wouldnt be hard to
> add to irange.  wi_fold() cuurently returns null, it could easily
> return a bool indicating if an overflow happened, and wi_fold_in_parts
> and fold_range would simply OR the results all together of the
> compoent wi_fold() calls.  It would require updating/audfiting  a
> number of range-op entries and adding an overflowed_p()  query to
> irange.

I also tried to add a 'm_ovf' field to irange and record the
overflow info during 'wi_fold' in range-op(e.g. plus/minus/mult).
But, as you said, overflow info is not widely used (it seems that 
match.pd covers most of the cases which are using overflow info).
It may not be worth adding a field to every instance of VR, and 
additional cost may not be profitable to maintain it during VR
assign/union_/intersect.
I've attached a patch for this idea, just for reference.

Currently, what I'm trying to do is find a better place to add
APIs to check the overflow info for match.pd.

vr-range.h/cc would be one choice :)
I noticed Aldy mentioned 'may_overflow_p' in a comment of PR100499,
where this API was trying to add? This may be another choice.

Thanks a gain for your great comments!

BR,
Jeff (Jiufu Guo)

>
> Andrew

gcc/ChangeLog:

	* range-op.cc (value_range_with_overflow): Call set_overflow.
	(operator_mult::wi_fold): Call set_overflow.
	* value-query.h (get_range): New function.
	* value-range-storage.cc (irange_storage::set_irange): Set
	ovf info.
	(irange_storage::get_irange): Call set_overflow.
	* value-range-storage.h (irange_storage): Add m_ovf field.
	* value-range.cc (irange::nonnegative_p): New function.
	(irange::nonpositive_p): New function.
	(irange::operator=): Maintain ovf info.
	(irange::union_): Maintain ovf info.
	(irange::intersect): Maintain ovf info.
	* value-range.h (irange::irange): Initialize m_ovf.

---
 gcc/range-op.cc            | 17 +++++++++++++----
 gcc/value-query.h          | 10 ++++++++++
 gcc/value-range-storage.cc |  2 ++
 gcc/value-range-storage.h  |  1 +
 gcc/value-range.cc         | 29 ++++++++++++++++++++++++++---
 gcc/value-range.h          |  6 ++++++
 6 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 3ab2c665901..02971e8a16a 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -433,6 +433,10 @@ value_range_with_overflow (irange &r, tree type,
   const unsigned int prec = TYPE_PRECISION (type);
   const bool overflow_wraps = TYPE_OVERFLOW_WRAPS (type);
 
+  bool ovf = !TYPE_OVERFLOW_UNDEFINED (type)
+	     && (min_ovf != wi::OVF_NONE || max_ovf != wi::OVF_NONE);
+  r.set_overflow (ovf ? wi::OVF_UNKNOWN : wi::OVF_NONE);
+
   // For one bit precision if max != min, then the range covers all
   // values.
   if (prec == 1 && wi::ne_p (wmax, wmin))
@@ -2050,10 +2054,15 @@ operator_mult::wi_fold (irange &r, tree type,
 
   // Sort the 4 products so that min is in prod0 and max is in
   // prod3.
-  widest2_int prod0 = min0 * min1;
-  widest2_int prod1 = min0 * max1;
-  widest2_int prod2 = max0 * min1;
-  widest2_int prod3 = max0 * max1;
+  wi::overflow_type ovf1, ovf2, ovf3, ovf4;
+  widest2_int prod0 = wi::mul (min0, min1, sign, &ovf1);
+  widest2_int prod1 = wi::mul (min0, max1, sign, &ovf2);
+  widest2_int prod2 = wi::mul (max0, min1, sign, &ovf3);
+  widest2_int prod3 = wi::mul (max0, max1, sign, &ovf4);
+  bool ovf = !TYPE_OVERFLOW_UNDEFINED (type)
+	     && (ovf1 != wi::OVF_NONE || ovf2 != wi::OVF_NONE
+		 || ovf3 != wi::OVF_NONE || ovf3 != wi::OVF_NONE);
+  r.set_overflow (ovf ? wi::OVF_UNKNOWN : wi::OVF_NONE);
 
   // min0min1 > max0max1
   if (prod0 > prod3)
diff --git a/gcc/value-query.h b/gcc/value-query.h
index d10c3eac1e2..cf488c7ea9b 100644
--- a/gcc/value-query.h
+++ b/gcc/value-query.h
@@ -137,6 +137,16 @@ get_range_query (const struct function *fun)
   return (fun && fun->x_range_query) ? fun->x_range_query : &global_ranges;
 }
 
+/* Return true if there is range for "X" expression at "S" statement,
+   and the range is not varying and not undefined.   */
+
+inline bool
+get_range (vrange &r, tree x, gimple *s = NULL)
+{
+  return get_range_query (cfun)->range_of_expr (r, x, s) && !r.varying_p ()
+	 && !r.undefined_p ();
+}
+
 // Query the global range of NAME in function F.  Default to cfun.
 extern void gimple_range_global (vrange &v, tree name,
 				 struct function *f = cfun);
diff --git a/gcc/value-range-storage.cc b/gcc/value-range-storage.cc
index 2f82739680c..93bbae3c465 100644
--- a/gcc/value-range-storage.cc
+++ b/gcc/value-range-storage.cc
@@ -277,6 +277,7 @@ void
 irange_storage::set_irange (const irange &r)
 {
   gcc_checking_assert (fits_p (r));
+  m_ovf = r.m_ovf;
 
   if (r.undefined_p ())
     {
@@ -325,6 +326,7 @@ read_wide_int (wide_int &w,
 void
 irange_storage::get_irange (irange &r, tree type) const
 {
+  r.set_overflow (m_ovf);
   if (m_kind == VR_UNDEFINED)
     {
       r.set_undefined ();
diff --git a/gcc/value-range-storage.h b/gcc/value-range-storage.h
index 99fb815cdc2..62a17d73e9f 100644
--- a/gcc/value-range-storage.h
+++ b/gcc/value-range-storage.h
@@ -90,6 +90,7 @@ private:
   unsigned char m_num_ranges;
 
   enum value_range_kind m_kind : 3;
+  wi::overflow_type m_ovf;
 
   // The length of this is m_num_ranges * 2 + 1 to accomodate the nonzero bits.
   HOST_WIDE_INT m_val[1];
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 707b1f15fd4..58aed715658 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -287,6 +287,18 @@ add_vrange (const vrange &v, inchash::hash &hstate,
 
 } //namespace inchash
 
+bool
+irange::nonnegative_p () const
+{
+  return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ()));
+}
+
+bool
+irange::nonpositive_p () const
+{
+  return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ()));
+}
+
 bool
 irange::supports_type_p (const_tree type) const
 {
@@ -916,6 +928,7 @@ irange::operator= (const irange &src)
   m_type = src.m_type;
   m_kind = src.m_kind;
   m_nonzero_mask = src.m_nonzero_mask;
+  m_ovf = src.m_ovf;
   if (m_max_ranges == 1)
     normalize_kind ();
   if (flag_checking)
@@ -1244,7 +1257,15 @@ irange::union_ (const vrange &v)
     }
 
   if (varying_p ())
-    return false;
+    {
+      if (m_ovf == r.m_ovf || m_ovf == wi::OVF_UNKNOWN)
+	return false;
+      m_ovf = wi::OVF_UNKNOWN;
+      return true;
+    }
+
+  if (m_ovf != r.m_ovf)
+    m_ovf = wi::OVF_UNKNOWN;
 
   if (r.varying_p ())
     {
@@ -1417,14 +1438,16 @@ irange::intersect (const vrange &v)
       set_undefined ();
       return true;
     }
-  if (r.varying_p ())
-    return false;
+
   if (varying_p ())
     {
       operator= (r);
       return true;
     }
 
+  if (r.varying_p ())
+    return false;
+
   if (r.num_pairs () == 1)
     {
       bool res = intersect (r.lower_bound (), r.upper_bound ());
diff --git a/gcc/value-range.h b/gcc/value-range.h
index 2b4ebabe7c8..ae35f6fbcf7 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -145,6 +145,10 @@ public:
   virtual bool singleton_p (tree *result = NULL) const override;
   bool singleton_p (wide_int &) const;
   bool contains_p (const wide_int &) const;
+  bool nonnegative_p () const;
+  bool nonpositive_p () const;
+  bool no_overflow () const { return m_ovf == wi::OVF_NONE; }
+  void set_overflow (wi::overflow_type ovf) { m_ovf = ovf;}
 
   // In-place operators.
   virtual bool union_ (const vrange &) override;
@@ -197,6 +201,7 @@ private:
   unsigned char m_max_ranges;
   tree m_type;
   wide_int m_nonzero_mask;
+  wi::overflow_type m_ovf;
 protected:
   wide_int *m_base;
 };
@@ -839,6 +844,7 @@ irange::irange (wide_int *base, unsigned nranges, bool resizable)
     m_max_ranges (nranges)
 {
   m_base = base;
+  m_ovf = wi::OVF_UNKNOWN;
   set_undefined ();
 }
Richard Biener July 17, 2023, 9:24 a.m. UTC | #6
On Fri, 14 Jul 2023, Andrew MacLeod wrote:

> 
> On 7/14/23 09:37, Richard Biener wrote:
> > On Fri, 14 Jul 2023, Aldy Hernandez wrote:
> >
> >> I don't know what you're trying to accomplish here, as I haven't been
> >> following the PR, but adding all these helper functions to the ranger
> >> header
> >> file seems wrong, especially since there's only one use of them. I see
> >> you're
> >> tweaking the irange API, adding helper functions to range-op (which is only
> >> for code dealing with implementing range operators for tree codes), etc
> >> etc.
> >>
> >> If you need these helper functions, I suggest you put them closer to their
> >> uses (i.e. wherever the match.pd support machinery goes).
> > Note I suggested the opposite beacuse I thought these kind of helpers
> > are closer to value-range support than to match.pd.
> 
> 
> probably vr-values.{cc.h} and  the simply_using_ranges paradigm would be the
> most sensible place to put these kinds of auxiliary routines?
> 
> 
> >
> > But I take away from your answer that there's nothing close in the
> > value-range machinery that answers the question whether A op B may
> > overflow?
> 
> we dont track it in ranges themselves.   During calculation of a range we
> obviously know, but propagating that generally when we rarely care doesn't
> seem worthwhile.  The very first generation of irange 6 years ago had an
> overflow_p() flag, but it was removed as not being worth keeping.     easier
> to simply ask the question when it matters
> 
> As the routines show, it pretty easy to figure out when the need arises so I
> think that should suffice.  At least for now,
> 
> Should we decide we would like it in general, it wouldnt be hard to add to
> irange.  wi_fold() cuurently returns null, it could easily return a bool
> indicating if an overflow happened, and wi_fold_in_parts and fold_range would
> simply OR the results all together of the compoent wi_fold() calls.  It would
> require updating/audfiting  a number of range-op entries and adding an
> overflowed_p()  query to irange.

Ah, yeah - the folding APIs would be a good fit I guess.  I was
also looking to have the "new" helpers to be somewhat consistent
with the ranger API.

So if we had a fold_range overload with either an output argument
or a flag that makes it return false on possible overflow that
would work I guess?  Since we have a virtual class setup we
might be able to provide a default failing method and implement
workers for plus and mult (as needed for this patch) as the need
arises?

Thanks,
Richard.
Jiufu Guo July 17, 2023, 1:45 p.m. UTC | #7
Hi,

Richard Biener <rguenther@suse.de> writes:

> On Fri, 14 Jul 2023, Andrew MacLeod wrote:
>
>> 
>> On 7/14/23 09:37, Richard Biener wrote:
>> > On Fri, 14 Jul 2023, Aldy Hernandez wrote:
>> >
>> >> I don't know what you're trying to accomplish here, as I haven't been
>> >> following the PR, but adding all these helper functions to the ranger
>> >> header
>> >> file seems wrong, especially since there's only one use of them. I see
>> >> you're
>> >> tweaking the irange API, adding helper functions to range-op (which is only
>> >> for code dealing with implementing range operators for tree codes), etc
>> >> etc.
>> >>
>> >> If you need these helper functions, I suggest you put them closer to their
>> >> uses (i.e. wherever the match.pd support machinery goes).
>> > Note I suggested the opposite beacuse I thought these kind of helpers
>> > are closer to value-range support than to match.pd.
>> 
>> 
>> probably vr-values.{cc.h} and  the simply_using_ranges paradigm would be the
>> most sensible place to put these kinds of auxiliary routines?
>> 
>> 
>> >
>> > But I take away from your answer that there's nothing close in the
>> > value-range machinery that answers the question whether A op B may
>> > overflow?
>> 
>> we dont track it in ranges themselves.   During calculation of a range we
>> obviously know, but propagating that generally when we rarely care doesn't
>> seem worthwhile.  The very first generation of irange 6 years ago had an
>> overflow_p() flag, but it was removed as not being worth keeping.     easier
>> to simply ask the question when it matters
>> 
>> As the routines show, it pretty easy to figure out when the need arises so I
>> think that should suffice.  At least for now,
>> 
>> Should we decide we would like it in general, it wouldnt be hard to add to
>> irange.  wi_fold() cuurently returns null, it could easily return a bool
>> indicating if an overflow happened, and wi_fold_in_parts and fold_range would
>> simply OR the results all together of the compoent wi_fold() calls.  It would
>> require updating/audfiting  a number of range-op entries and adding an
>> overflowed_p()  query to irange.
>
> Ah, yeah - the folding APIs would be a good fit I guess.  I was
> also looking to have the "new" helpers to be somewhat consistent
> with the ranger API.
>
> So if we had a fold_range overload with either an output argument
> or a flag that makes it return false on possible overflow that
> would work I guess?  Since we have a virtual class setup we
> might be able to provide a default failing method and implement
> workers for plus and mult (as needed for this patch) as the need
> arises?

Thanks for your comments!
Here is a concern.  The patterns in match.pd may be supported by
'vrp' passes. At that time, the range info would be computed (via
the value-range machinery) and cached for each SSA_NAME. In the
patterns, when range_of_expr is called for a capture, the range
info is retrieved from the cache, and no need to fold_range again.
This means the overflow info may also need to be cached together
with other range info.  There may be additional memory and time
cost.

BR,
Jeff (Jiufu Guo)

>
> Thanks,
> Richard.
Andrew MacLeod July 17, 2023, 5:24 p.m. UTC | #8
On 7/17/23 09:45, Jiufu Guo wrote:
>
>>> Should we decide we would like it in general, it wouldnt be hard to add to
>>> irange.  wi_fold() cuurently returns null, it could easily return a bool
>>> indicating if an overflow happened, and wi_fold_in_parts and fold_range would
>>> simply OR the results all together of the compoent wi_fold() calls.  It would
>>> require updating/audfiting  a number of range-op entries and adding an
>>> overflowed_p()  query to irange.
>> Ah, yeah - the folding APIs would be a good fit I guess.  I was
>> also looking to have the "new" helpers to be somewhat consistent
>> with the ranger API.
>>
>> So if we had a fold_range overload with either an output argument
>> or a flag that makes it return false on possible overflow that
>> would work I guess?  Since we have a virtual class setup we
>> might be able to provide a default failing method and implement
>> workers for plus and mult (as needed for this patch) as the need
>> arises?
> Thanks for your comments!
> Here is a concern.  The patterns in match.pd may be supported by
> 'vrp' passes. At that time, the range info would be computed (via
> the value-range machinery) and cached for each SSA_NAME. In the
> patterns, when range_of_expr is called for a capture, the range
> info is retrieved from the cache, and no need to fold_range again.
> This means the overflow info may also need to be cached together
> with other range info.  There may be additional memory and time
> cost.
>

I've been thinking about this a little bit, and how to make the info 
available in a useful way.

I wonder if maybe we just add another entry point  to range-ops that 
looks a bit like fold_range ..

   Attached is an (untested) patch which ads overflow_free_p(op1, op2, 
relation)  to rangeops.   It defaults to returning false.  If you want 
to implement it for say plus,  you'd add to operator_plus in 
range-ops.cc  something like

operator_plus::overflow_free_p (irange&op1, irange& op2, relation_kind)
{
    // stuff you do in plus_without_overflow
}

I added relation_kind as  param, but you can ignore it.  maybe it wont 
ever help, but it seems like if we know there is a relation between op1 
and op2 we might be able to someday determine something else?     if 
not, remove it.

Then all you need to do too access it is to go thru range-op_handler.. 
so for instance:

range_op_handler (PLUS_EXPR).overflow_free_p (op1, op2)

It'll work for all types an all tree codes. the dispatch machinery will 
return false unless both op1 and op2 are integral ranges, and then it 
will invoke the appropriate handler, defaulting to returning FALSE.

I also am not a fan of the get_range  routine.  It would be better to 
generally just call range_of_expr, get the results, then handle 
undefined in the new overflow_free_p() routine and return false.  
varying should not need anything special since it will trigger the 
overflow when you do the calculation.

The auxillary routines could go in vr-values.h/cc.  They seem like 
things that simplify_using_ranges could utilize, and when we get to 
integrating simplify_using_ranges better,  what you are doing may end up 
there anyway

Does that work?

Andrew
Jiufu Guo July 18, 2023, 9:44 a.m. UTC | #9
Hi,

Andrew MacLeod <amacleod@redhat.com> writes:

> On 7/17/23 09:45, Jiufu Guo wrote:
>>
>>>> Should we decide we would like it in general, it wouldnt be hard to add to
>>>> irange.  wi_fold() cuurently returns null, it could easily return a bool
>>>> indicating if an overflow happened, and wi_fold_in_parts and fold_range would
>>>> simply OR the results all together of the compoent wi_fold() calls.  It would
>>>> require updating/audfiting  a number of range-op entries and adding an
>>>> overflowed_p()  query to irange.
>>> Ah, yeah - the folding APIs would be a good fit I guess.  I was
>>> also looking to have the "new" helpers to be somewhat consistent
>>> with the ranger API.
>>>
>>> So if we had a fold_range overload with either an output argument
>>> or a flag that makes it return false on possible overflow that
>>> would work I guess?  Since we have a virtual class setup we
>>> might be able to provide a default failing method and implement
>>> workers for plus and mult (as needed for this patch) as the need
>>> arises?
>> Thanks for your comments!
>> Here is a concern.  The patterns in match.pd may be supported by
>> 'vrp' passes. At that time, the range info would be computed (via
>> the value-range machinery) and cached for each SSA_NAME. In the
>> patterns, when range_of_expr is called for a capture, the range
>> info is retrieved from the cache, and no need to fold_range again.
>> This means the overflow info may also need to be cached together
>> with other range info.  There may be additional memory and time
>> cost.
>>
>
> I've been thinking about this a little bit, and how to make the info available in a useful way.
>
> I wonder if maybe we just add another entry point  to range-ops that looks a bit like fold_range ..
>
>   Attached is an (untested) patch which ads overflow_free_p(op1, op2,
> relation)  to rangeops.   It defaults to returning false.  If you want
> to implement it for say plus,  you'd add to operator_plus in
> range-ops.cc  something like
>
> operator_plus::overflow_free_p (irange&op1, irange& op2, relation_kind)
> {
>    // stuff you do in plus_without_overflow
> }
>
> I added relation_kind as  param, but you can ignore it.  maybe it wont
> ever help, but it seems like if we know there is a relation between
> op1 and op2 we might be able to someday determine something else?    
> if not, remove it.
>
> Then all you need to do too access it is to go thru range-op_handler.. so for instance:
>
> range_op_handler (PLUS_EXPR).overflow_free_p (op1, op2)
>
> It'll work for all types an all tree codes. the dispatch machinery
> will return false unless both op1 and op2 are integral ranges, and
> then it will invoke the appropriate handler, defaulting to returning
> FALSE.

Very good suggestions! Thanks so much for your great guide!

>
> I also am not a fan of the get_range  routine.  It would be better to
> generally just call range_of_expr, get the results, then handle
> undefined in the new overflow_free_p() routine and return false. 
> varying should not need anything special since it will trigger the
> overflow when you do the calculation.

The general code in the trunk is just like you said: range_of_expr is
used when querying a range for an expr.
I am also aware that: a range with varying([min, max]) may be ok if the
range is computed from other ranges, especially if there is no overflow.
For example, '[MAX-100, MAX] - [0, 100]' generates a varying range, but
it would be ok for some case.
And a varying range will trigger overflow if it takes part in a
calculation as your said.
So, I agree that varying would not be specially for some patterns.

>
> The auxillary routines could go in vr-values.h/cc.  They seem like
> things that simplify_using_ranges could utilize, and when we get to
> integrating simplify_using_ranges better,  what you are doing may end
> up there anyway

Thanks for your suggestion!  Or maybe we could just use the APIs 
in match.pd directly.

>
> Does that work?

I believe this would work!
I will submit a new version patch!  Thanks again for your comments!

BR,
Jeff (Jiufu Guo)

>
> Andrew
diff mbox series

Patch

diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index 01e62d3ff3901143bde33dc73c0debf41d0c0fdd..620fe32e85e5fe3847a933554fc656b2939cf02d 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -926,3 +926,53 @@  assume_query::dump (FILE *f)
     }
   fprintf (f, "------------------------------\n");
 }
+
+/* Return true if the operation "X CODE Y" in type does not overflow
+   underflow or wrap with value range info, otherwise return false.  */
+
+bool
+arith_without_overflow_p (tree_code code, tree x, tree y, tree type)
+{
+  gcc_assert (INTEGRAL_TYPE_P (type));
+
+  if (TYPE_OVERFLOW_UNDEFINED (type))
+    return true;
+
+  value_range vr0;
+  value_range vr1;
+  if (!(get_range (vr0, x) && get_range (vr1, y)))
+    return false;
+
+  switch (code)
+    {
+    case PLUS_EXPR:
+      return plus_without_overflow_p (vr0, vr1, type);
+    case MINUS_EXPR:
+      return minus_without_overflow_p (vr0, vr1, type);
+    case MULT_EXPR:
+      return mult_without_overflow_p (vr0, vr1, type);
+    default:
+      gcc_unreachable ();
+    }
+
+  return false;
+}
+
+/* Return true if "X" and "Y" have the same sign or zero.  */
+
+bool
+same_sign_p (tree x, tree y, tree type)
+{
+  gcc_assert (INTEGRAL_TYPE_P (type));
+
+  if (TYPE_UNSIGNED (type))
+    return true;
+
+  value_range vr0;
+  value_range vr1;
+  if (!(get_range (vr0, x) && get_range (vr1, y)))
+    return false;
+
+  return (vr0.nonnegative_p () && vr1.nonnegative_p ())
+	 || (vr0.nonpositive_p () && vr1.nonpositive_p ());
+}
diff --git a/gcc/gimple-range.h b/gcc/gimple-range.h
index 6587e4923ff44e10826a697ecced237a0ad23c88..84eac87392b642ed3305011415c804f5b319e09f 100644
--- a/gcc/gimple-range.h
+++ b/gcc/gimple-range.h
@@ -101,5 +101,7 @@  protected:
   gori_compute m_gori;
 };
 
+bool arith_without_overflow_p (tree_code code, tree x, tree y, tree type);
+bool same_sign_p (tree x, tree y, tree type);
 
 #endif // GCC_GIMPLE_RANGE_H
diff --git a/gcc/match.pd b/gcc/match.pd
index 8543f777a28e4f39b2b2a40d0702aed88786bbb3..87e990c5b1ebbd116d7d7efdba62347d3a967cdd 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -942,6 +942,70 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 #endif
    ))))
 
+#if GIMPLE
+(for div (trunc_div exact_div)
+ /* Simplify (t + M*N) / N -> t / N + M.  */
+ (simplify
+  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
+  (if (INTEGRAL_TYPE_P (type)
+       && arith_without_overflow_p (MULT_EXPR, @1, @2, type)
+       && arith_without_overflow_p (PLUS_EXPR, @0, @3, type)
+       && same_sign_p (@0, @4, type))
+  (plus (div @0 @2) @1)))
+
+ /* Simplify (t - M*N) / N -> t / N - M.  */
+ (simplify
+  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
+  (if (INTEGRAL_TYPE_P (type)
+       && arith_without_overflow_p (MULT_EXPR, @1, @2, type)
+       && arith_without_overflow_p (MINUS_EXPR, @0, @3, type)
+       && same_sign_p (@0, @4, type))
+  (minus (div @0 @2) @1))))
+
+/* Simplify
+   (t + C) / N -> t / N + C / N where C is multiple of N.
+   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
+(for op (trunc_div exact_div rshift)
+ (simplify
+  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+   (with
+    {
+      wide_int c = wi::to_wide (@1);
+      wide_int n = wi::to_wide (@2);
+      bool is_rshift = op == RSHIFT_EXPR;
+      bool neg_c = false;
+      bool ok = false;
+      if (INTEGRAL_TYPE_P (type))
+        {
+	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
+			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
+	  ok = ok && arith_without_overflow_p (PLUS_EXPR, @0, @1, type)
+	       && same_sign_p (@0, @3, type);
+
+	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
+	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
+	    {
+	      neg_c = true;
+	      c = -c;
+	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
+			     : wi::multiple_of_p (c, n, UNSIGNED);
+	      value_range vr;
+	      ok = ok && get_range (vr, @0)
+		   && wi::geu_p (vr.lower_bound (), c);
+	    }
+	}
+    }
+   (if (ok)
+   (with
+    {
+      wide_int m;
+      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
+		    : wi::div_trunc (c, n, TYPE_SIGN (type));
+      m = neg_c ? -m : m;
+    }
+   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
+#endif
+
 (for op (negate abs)
  /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
  (for coss (COS COSH)
diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index cb584314f4cfc93aeec50c7f888829997a7dc8eb..487e781a0cdfa39416589983a246327957c14d54 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -4311,6 +4311,83 @@  range_op_table::initialize_integral_ops ()
 
 }
 
+bool
+plus_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
+{
+  wi::overflow_type ovf;
+  signop sgn = TYPE_SIGN (type);
+  wide_int wmax0 = vr0.upper_bound ();
+  wide_int wmax1 = vr1.upper_bound ();
+  wi::add (wmax0, wmax1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  if (TYPE_UNSIGNED (type))
+    return true;
+
+  wide_int wmin0 = vr0.lower_bound ();
+  wide_int wmin1 = vr1.lower_bound ();
+  wi::add (wmin0, wmin1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  return true;
+}
+
+bool
+minus_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
+{
+  wi::overflow_type ovf;
+  signop sgn = TYPE_SIGN (type);
+  wide_int wmin0 = vr0.lower_bound ();
+  wide_int wmax1 = vr1.upper_bound ();
+  wi::sub (wmin0, wmax1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  if (TYPE_UNSIGNED (type))
+    return true;
+
+  wide_int wmax0 = vr0.upper_bound ();
+  wide_int wmin1 = vr1.lower_bound ();
+  wi::sub (wmax0, wmin1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  return true;
+}
+
+bool
+mult_without_overflow_p (value_range &vr0, value_range &vr1, tree type)
+{
+  wi::overflow_type ovf;
+  signop sgn = TYPE_SIGN (type);
+  wide_int wmax0 = vr0.upper_bound ();
+  wide_int wmax1 = vr1.upper_bound ();
+  wi::mul (wmax0, wmax1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  if (TYPE_UNSIGNED (type))
+    return true;
+
+  wide_int wmin0 = vr0.lower_bound ();
+  wide_int wmin1 = vr1.lower_bound ();
+  wi::mul (wmin0, wmin1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  wi::mul (wmin0, wmax1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  wi::mul (wmax0, wmin1, sgn, &ovf);
+  if (ovf != wi::OVF_NONE)
+    return false;
+
+  return true;
+}
+
 #if CHECKING_P
 #include "selftest.h"
 
diff --git a/gcc/range-op.h b/gcc/range-op.h
index af94c2756a74f710aa50aec1ac3b3de5eeb43a8e..1697ce5464c0cc8fbfb86234137ba7cc23e10979 100644
--- a/gcc/range-op.h
+++ b/gcc/range-op.h
@@ -220,6 +220,10 @@  protected:
   range_operator *m_operator;
 };
 
+extern bool plus_without_overflow_p (value_range &, value_range &, tree);
+extern bool minus_without_overflow_p (value_range &, value_range &, tree);
+extern bool mult_without_overflow_p (value_range &, value_range &, tree);
+
 // Cast the range in R to TYPE if R supports TYPE.
 
 inline bool
diff --git a/gcc/value-query.h b/gcc/value-query.h
index d10c3eac1e2e6c477cbab026942a45b4fcc2ddce..cf488c7ea9bfc423fd3de9d2c743f71175804ae2 100644
--- a/gcc/value-query.h
+++ b/gcc/value-query.h
@@ -137,6 +137,16 @@  get_range_query (const struct function *fun)
   return (fun && fun->x_range_query) ? fun->x_range_query : &global_ranges;
 }
 
+/* Return true if there is range for "X" expression at "S" statement,
+   and the range is not varying and not undefined.   */
+
+inline bool
+get_range (vrange &r, tree x, gimple *s = NULL)
+{
+  return get_range_query (cfun)->range_of_expr (r, x, s) && !r.varying_p ()
+	 && !r.undefined_p ();
+}
+
 // Query the global range of NAME in function F.  Default to cfun.
 extern void gimple_range_global (vrange &v, tree name,
 				 struct function *f = cfun);
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 011bdbdeae62845b5627a8dfa0d261370df7e5db..5ae4e044194326c24aab6babf8e0df3e4b313e98 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -313,6 +313,18 @@  add_vrange (const vrange &v, inchash::hash &hstate,
 
 } //namespace inchash
 
+bool
+irange::nonnegative_p () const
+{
+  return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ()));
+}
+
+bool
+irange::nonpositive_p () const
+{
+  return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ()));
+}
+
 bool
 irange::supports_type_p (const_tree type) const
 {
diff --git a/gcc/value-range.h b/gcc/value-range.h
index 0170188201bc9b1c4e117661c9a62819dc1547c5..a12dea514e48cdd04a3cee5dc6304637e501d399 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -280,6 +280,8 @@  public:
   virtual bool singleton_p (tree *result = NULL) const override;
   bool singleton_p (wide_int &) const;
   bool contains_p (const wide_int &) const;
+  bool nonnegative_p () const;
+  bool nonpositive_p () const;
 
   // In-place operators.
   virtual bool union_ (const vrange &) override;
diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c b/gcc/testsuite/gcc.dg/pr108757-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..7908f4bdcb8e225fe311b668efbe8f6db525b4d5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108757-1.c
@@ -0,0 +1,18 @@ 
+/* PR tree-optimization/108757 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include <limits.h>
+#define N 5
+#define M 3
+#define GAP 0
+typedef unsigned int UINT;
+typedef int INT;
+#define UMAX UINT_MAX
+#define IMAX INT_MAX
+#define IMIN INT_MIN
+#include "pr108757.h"
+
+/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ " "optimized" } } *
+/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c b/gcc/testsuite/gcc.dg/pr108757-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..82bebd09944f0be2e0690c604723bfe6182acec3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108757-2.c
@@ -0,0 +1,19 @@ 
+/* PR tree-optimization/108757 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
+
+#include <limits.h>
+#define N 4
+#define M 3
+#define GAP 2
+typedef unsigned int UINT;
+typedef int INT;
+#define UMAX UINT_MAX
+#define IMAX INT_MAX
+#define IMIN INT_MIN
+#include "pr108757.h"
+
+/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/pr108757.h b/gcc/testsuite/gcc.dg/pr108757.h
new file mode 100644
index 0000000000000000000000000000000000000000..5550199c1ef952f54eaa83087cec25e992057c34
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108757.h
@@ -0,0 +1,233 @@ 
+#define NOINLINE __attribute__ ((noinline))
+UINT NOINLINE
+opt_u1 (UINT x)
+{
+  if (x < (M * N) - GAP)
+    return 0;
+  UINT a = x - (M * N);
+  UINT b = a / N;
+  return b + M;
+}
+
+UINT NOINLINE
+opt_u2 (UINT x)
+{
+  if (x > (UMAX - (M * N) + GAP))
+    return 0;
+  UINT a = x + (M * N);
+  UINT b = a / N;
+  return b - M;
+}
+
+INT NOINLINE
+opt_s1 (INT x)
+{
+  if (x < (M * N) - GAP)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s2 (INT x)
+{
+  if (x < IMIN + (M * N) - GAP || x > 0)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s3 (INT x)
+{
+  if (x < (M * N) - GAP)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / -N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s4 (INT x)
+{
+  if (x < IMIN + (M * N) - GAP || x > 0)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / -N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s5 (INT x)
+{
+  if (x > (-M * N) + GAP)
+    return 0;
+  INT a = x - (-M * N);
+  INT b = a / N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s6 (INT x)
+{
+  if (x > IMAX - (M * N) + GAP || x < 0)
+    return 0;
+  INT a = x - (-M * N);
+  INT b = a / N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s7 (INT x)
+{
+  if (x > (M * -N) + GAP)
+    return 0;
+  INT a = x - (M * -N);
+  INT b = a / -N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s8 (INT x)
+{
+  if (x > IMAX - (M * N) + GAP || x < 0)
+    return 0;
+  INT a = x - (M * -N);
+  INT b = a / -N;
+  return b + M;
+}
+
+UINT NOINLINE
+opt_u3 (UINT x)
+{
+  if (x < (M << N) - GAP)
+    return 0;
+  UINT a = x - (M << N);
+  UINT b = a >> N;
+  return b + M;
+}
+
+UINT NOINLINE
+opt_u4 (UINT x)
+{
+  if (x > (UMAX - (M << N)) + GAP)
+    return 0;
+  UINT a = x + (M << N);
+  UINT b = a >> N;
+  return b - M;
+}
+
+INT NOINLINE
+opt_s9 (INT x)
+{
+  if (x < (M << N) - GAP)
+    return 0;
+  INT a = x - (M << N);
+  INT b = a >> N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s10 (INT x)
+{
+  if (x < IMIN + (M << N) - GAP || x > 0)
+    return 0;
+  INT a = x - (M << N);
+  INT b = a >> N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s11 (INT x)
+{
+  if (x > (-M << N) + GAP)
+    return 0;
+  INT a = x - (-M << N);
+  INT b = a >> N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s12 (INT x)
+{
+  if (x > IMAX - (M << N) + GAP || x < 0)
+    return 0;
+  INT a = x - (-M << N);
+  INT b = a >> N;
+  return b + -M;
+}
+
+UINT NOINLINE
+opt_u5 (UINT x, UINT n, UINT m)
+{
+  if (n > N || m > M)
+    return 0;
+  if (x < (M*N) - GAP)
+    return 0;
+  UINT a = x - (m * n);
+  UINT b = a / n;
+  return b + m;
+}
+
+UINT NOINLINE
+opt_u6 (UINT x, UINT n, UINT m)
+{
+  if (n > N || m > M)
+    return 0;
+  if (x > (UMAX - M*N) + GAP)
+    return 0;
+  UINT a = x + (m * n);
+  UINT b = a / n;
+  return b - m;
+}
+
+INT NOINLINE
+opt_s13 (INT x, INT n, INT m)
+{
+  if (n > N || m > M || n < 0 || m < 0)
+    return 0;
+  if (x < (M*N) - GAP)
+    return 0;
+  INT a = x - (m * n);
+  INT b = a / n;
+  return b + m;
+}
+
+INT NOINLINE
+opt_s14 (INT x, INT n, INT m)
+{
+  if (n > N || m > M || n < 0 || m < 0)
+    return 0;
+  if (x > -M*N + GAP)
+    return 0;
+  INT a = x + (m * n);
+  INT b = a / n;
+  return b - m;
+}
+
+INT
+opt_s15 (INT x, INT n, INT m)
+{
+  if (n > 0 || m > 0 || n < -N || m < -M)
+    return 0;
+  if (x < (M*N) - GAP)
+    return 0;
+  INT a = x - (m * n);
+  INT b = a / n;
+  return b + m;
+}
+
+INT NOINLINE
+opt_s16 (INT x, INT n, INT m)
+{
+  if (n > 0 || m > 0 || n < -N || m < -M)
+    return 0;
+  if (x < 0 || x > (IMAX - M*N) + GAP)
+    return 0;
+  INT a = x + (m * n);
+  INT b = a / n;
+  return b - m;
+}
+