@@ -8996,6 +8996,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
(cmp @0 { res; })))))))))
+/* Invert sign of X in comparisons of the form C1 - X CMP C2. */
+
+(for cmp (lt le gt ge eq ne)
+ rcmp (gt ge lt le eq ne)
+ (simplify
+ (cmp (minus INTEGER_CST@0 @1) INTEGER_CST@2)
+/* For UB-on-overflow types, simply switch sides for X and C2
+ to arrive at X RCMP C1 - C2, handling the case when the latter
+ expression overflows. */
+ (if (!TREE_OVERFLOW (@0) && !TREE_OVERFLOW (@2)
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1)))
+ (with { tree res = int_const_binop (MINUS_EXPR, @0, @2); }
+ (if (TREE_OVERFLOW (res))
+ (switch
+ (if (cmp == NE_EXPR)
+ { constant_boolean_node (true, type); })
+ (if (cmp == EQ_EXPR)
+ { constant_boolean_node (false, type); })
+ {
+ bool less = cmp == LE_EXPR || cmp == LT_EXPR;
+ bool ovf_high = wi::lt_p (wi::to_wide (@0), 0,
+ TYPE_SIGN (TREE_TYPE (@0)));
+ constant_boolean_node (less == ovf_high, type);
+ })
+ (rcmp @1 { res; }))))))
+
/* Canonicalizations of BIT_FIELD_REFs. */
(simplify
@@ -57,5 +57,5 @@ T (25, unsigned short, 2U - x, if (r > 2U) foo (0))
T (26, unsigned char, 2U - x, if (r <= 2U) foo (0))
/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { target { i?86-*-* x86_64-*-* } } } } */
-/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 11 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */
-/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 9 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 9 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 7 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
new file mode 100644
@@ -0,0 +1,66 @@
+/* PR tree-optimization/116024 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-forwprop1-details" } */
+
+#include <stdint.h>
+#include <limits.h>
+
+uint32_t f(void);
+
+int32_t i1(void)
+{
+ int32_t l = 10 - (int32_t)f();
+ return l <= 9; // f() > 0
+}
+
+int32_t i1a(void)
+{
+ int32_t l = 20 - (int32_t)f();
+ return l <= INT32_MIN; // return 0
+}
+
+int32_t i1b(void)
+{
+ int32_t l = 30 - (int32_t)f();
+ return l <= INT32_MIN + 31; // f() == INT32_MAX
+}
+
+int32_t i1c(void)
+{
+ int32_t l = INT32_MAX - 40 - (int32_t)f();
+ return l <= -38; // f() > INT32_MAX - 3
+}
+
+int32_t i1d(void)
+{
+ int32_t l = INT32_MAX - 50 - (int32_t)f();
+ return l <= INT32_MAX - 1; // f() != -50
+}
+
+int32_t i1e(void)
+{
+ int32_t l = INT32_MAX - 60 - (int32_t)f();
+ return l != INT32_MAX - 90; // f() != 30
+}
+
+int32_t i1f(void)
+{
+ int32_t l = INT32_MIN + 70 - (int32_t)f();
+ return l <= INT32_MAX - 2; // return 0
+}
+
+int32_t i1g(void)
+{
+ int32_t l = INT32_MAX/2 + 30 - (int32_t)f();
+ return l <= INT32_MIN/2 - 30; // return 1
+}
+
+
+/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*?- _" 5 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "return 0" 2 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "return 1" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 0" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to.* == 2147483647" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 2147483644" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to.* != 4294967246" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to.* != 30" 1 "forwprop1" } } */
Implement a match.pd pattern for C1 - X cmp C2, where C1 and C2 are integer constants and X is of a UB-on-overflow type. The pattern is simplified to X rcmp C1 - C2 by moving X and C2 to the other side of the comparison (with opposite signs). If C1 - C2 happens to overflow, replace the whole expression with either a constant 0 or a constant 1 node, depending on the comparison operator and the sign of the overflow. This transformation allows to occasionally save load-immediate / subtraction instructions, e.g. the following statement: 10 - (int) x <= 9; now compiles to sgt a0,a0,zero instead of li a5,10 sub a0,a5,a0 slti a0,a0,10 on 32-bit RISC-V. Additional examples can be found in the newly added test file. This patch has been bootstrapped and regtested on aarch64, x86_64, and i386, and additionally regtested on riscv32. Existing tests were adjusted where necessary. gcc/ChangeLog: PR tree-optimization/116024 * match.pd: New transformation around integer comparison. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr116024.c: New test. * gcc.dg/pr67089-6.c: Adjust. Signed-off-by: Artemiy Volkov <artemiy@synopsys.com> --- gcc/match.pd | 26 ++++++++++ gcc/testsuite/gcc.dg/pr67089-6.c | 4 +- gcc/testsuite/gcc.dg/tree-ssa/pr116024.c | 66 ++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024.c