Message ID | 20100930134638.GG10599@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Thu, Sep 30, 2010 at 3:46 PM, Jakub Jelinek <jakub@redhat.com> wrote: > On Thu, Sep 30, 2010 at 11:21:19AM +0200, Richard Guenther wrote: >> Hm. There's still nothing on GIMPLE that performs this then, right? > > True, it will hit probably only or mainly during FE folding. > >> In fact I have a hard time following the logic of the code as it tries >> to handle the several cases without comments on what's going on ;) >> >> So, can you please add comments of the sort >> >> /* Now we know the expression is of the form (x & b) & c ... */ > > Like this? Yes. This patch is ok. Thanks, Richard. > 2010-09-30 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/31261 > * fold-const.c (fold_binary): Optimize ((A & N) + B) & M > for constants M and N, M == (1LL << cst) - 1 && (N & M) == M. > > * gcc.dg/tree-ssa/pr31261.c: New test. > > --- gcc/fold-const.c.jj 2010-09-16 11:05:27.000000000 +0200 > +++ gcc/fold-const.c 2010-09-30 15:41:43.787396620 +0200 > @@ -11071,6 +11071,133 @@ fold_binary_loc (location_t loc, > fold_convert_loc (loc, type, arg0)); > } > > + /* For constants M and N, if M == (1LL << cst) - 1 && (N & M) == M, > + ((A & N) + B) & M -> (A + B) & M > + Similarly if (N & M) == 0, > + ((A | N) + B) & M -> (A + B) & M > + and for - instead of + (or unary - instead of +) > + and/or ^ instead of |. > + If B is constant and (B & M) == 0, fold into A & M. */ > + if (host_integerp (arg1, 1)) > + { > + unsigned HOST_WIDE_INT cst1 = tree_low_cst (arg1, 1); > + if (~cst1 && (cst1 & (cst1 + 1)) == 0 > + && INTEGRAL_TYPE_P (TREE_TYPE (arg0)) > + && (TREE_CODE (arg0) == PLUS_EXPR > + || TREE_CODE (arg0) == MINUS_EXPR > + || TREE_CODE (arg0) == NEGATE_EXPR) > + && (TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0)) > + || TREE_CODE (TREE_TYPE (arg0)) == INTEGER_TYPE)) > + { > + tree pmop[2]; > + int which = 0; > + unsigned HOST_WIDE_INT cst0; > + > + /* Now we know that arg0 is (C + D) or (C - D) or > + -C and arg1 (M) is == (1LL << cst) - 1. > + Store C into PMOP[0] and D into PMOP[1]. */ > + pmop[0] = TREE_OPERAND (arg0, 0); > + pmop[1] = NULL; > + if (TREE_CODE (arg0) != NEGATE_EXPR) > + { > + pmop[1] = TREE_OPERAND (arg0, 1); > + which = 1; > + } > + > + if (!host_integerp (TYPE_MAX_VALUE (TREE_TYPE (arg0)), 1) > + || (tree_low_cst (TYPE_MAX_VALUE (TREE_TYPE (arg0)), 1) > + & cst1) != cst1) > + which = -1; > + > + for (; which >= 0; which--) > + switch (TREE_CODE (pmop[which])) > + { > + case BIT_AND_EXPR: > + case BIT_IOR_EXPR: > + case BIT_XOR_EXPR: > + if (TREE_CODE (TREE_OPERAND (pmop[which], 1)) > + != INTEGER_CST) > + break; > + /* tree_low_cst not used, because we don't care about > + the upper bits. */ > + cst0 = TREE_INT_CST_LOW (TREE_OPERAND (pmop[which], 1)); > + cst0 &= cst1; > + if (TREE_CODE (pmop[which]) == BIT_AND_EXPR) > + { > + if (cst0 != cst1) > + break; > + } > + else if (cst0 != 0) > + break; > + /* If C or D is of the form (A & N) where > + (N & M) == M, or of the form (A | N) or > + (A ^ N) where (N & M) == 0, replace it with A. */ > + pmop[which] = TREE_OPERAND (pmop[which], 0); > + break; > + case INTEGER_CST: > + /* If C or D is a N where (N & M) == 0, it can be > + omitted (assumed 0). */ > + if ((TREE_CODE (arg0) == PLUS_EXPR > + || (TREE_CODE (arg0) == MINUS_EXPR && which == 0)) > + && (TREE_INT_CST_LOW (pmop[which]) & cst1) == 0) > + pmop[which] = NULL; > + break; > + default: > + break; > + } > + > + /* Only build anything new if we optimized one or both arguments > + above. */ > + if (pmop[0] != TREE_OPERAND (arg0, 0) > + || (TREE_CODE (arg0) != NEGATE_EXPR > + && pmop[1] != TREE_OPERAND (arg0, 1))) > + { > + tree utype = type; > + if (! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0))) > + { > + /* Perform the operations in a type that has defined > + overflow behavior. */ > + utype = unsigned_type_for (type); > + if (pmop[0] != NULL) > + pmop[0] = fold_convert_loc (loc, utype, pmop[0]); > + if (pmop[1] != NULL) > + pmop[1] = fold_convert_loc (loc, utype, pmop[1]); > + } > + > + if (TREE_CODE (arg0) == NEGATE_EXPR) > + tem = fold_build1_loc (loc, NEGATE_EXPR, utype, pmop[0]); > + else if (TREE_CODE (arg0) == PLUS_EXPR) > + { > + if (pmop[0] != NULL && pmop[1] != NULL) > + tem = fold_build2_loc (loc, PLUS_EXPR, utype, > + pmop[0], pmop[1]); > + else if (pmop[0] != NULL) > + tem = pmop[0]; > + else if (pmop[1] != NULL) > + tem = pmop[1]; > + else > + return build_int_cst (type, 0); > + } > + else if (pmop[0] == NULL) > + tem = fold_build1_loc (loc, NEGATE_EXPR, utype, pmop[1]); > + else > + tem = fold_build2_loc (loc, MINUS_EXPR, utype, > + pmop[0], pmop[1]); > + /* TEM is now the new binary +, - or unary - replacement. */ > + if (utype == type) > + return fold_build2_loc (loc, BIT_AND_EXPR, type, > + tem, arg1); > + else > + { > + tem = fold_build2_loc (loc, BIT_AND_EXPR, utype, tem, > + fold_convert_loc (loc, utype, > + arg1)); > + return fold_convert_loc (loc, type, tem); > + } > + } > + } > + } > + > t1 = distribute_bit_expr (loc, code, type, arg0, arg1); > if (t1 != NULL_TREE) > return t1; > --- gcc/testsuite/gcc.dg/tree-ssa/pr31261.c.jj 2010-09-29 09:48:36.374396699 +0200 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr31261.c 2010-09-29 10:12:49.612377621 +0200 > @@ -0,0 +1,40 @@ > +/* PR tree-optimization/31261 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-original" } */ > + > +unsigned int > +f1 (unsigned int a) > +{ > + return (8 - (a & 7)) & 7; > +} > + > +long int > +f2 (long int b) > +{ > + return (16 + (b & 7)) & 15; > +} > + > +char > +f3 (char c) > +{ > + return -(c & 63) & 31; > +} > + > +int > +f4 (int d) > +{ > + return (12 - (d & 15)) & 7; > +} > + > +int > +f5 (int e) > +{ > + return (12 - (e & 7)) & 15; > +} > + > +/* { dg-final { scan-tree-dump-times "return -a \& 7;" 1 "original" } } */ > +/* { dg-final { scan-tree-dump-times "return b \& 7;" 1 "original" } } */ > +/* { dg-final { scan-tree-dump-times "return \\(char\\) -\\(unsigned char\\) c \& 31;" 1 "original" } } */ > +/* { dg-final { scan-tree-dump-times "return \\(int\\) \\(12 - \\(unsigned int\\) d\\) \& 7;" 1 "original" } } */ > +/* { dg-final { scan-tree-dump-times "return 12 - \\(e \& 7\\) \& 15;" 1 "original" } } */ > +/* { dg-final { cleanup-tree-dump "original" } } */ > > Jakub >
On Thu, Sep 30, 2010 at 6:46 AM, Jakub Jelinek <jakub@redhat.com> wrote: > On Thu, Sep 30, 2010 at 11:21:19AM +0200, Richard Guenther wrote: >> Hm. There's still nothing on GIMPLE that performs this then, right? > > True, it will hit probably only or mainly during FE folding. > >> In fact I have a hard time following the logic of the code as it tries >> to handle the several cases without comments on what's going on ;) >> >> So, can you please add comments of the sort >> >> /* Now we know the expression is of the form (x & b) & c ... */ > > Like this? > > 2010-09-30 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/31261 > * fold-const.c (fold_binary): Optimize ((A & N) + B) & M > for constants M and N, M == (1LL << cst) - 1 && (N & M) == M. > > * gcc.dg/tree-ssa/pr31261.c: New test. > This caused: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45876 H.J.
--- gcc/fold-const.c.jj 2010-09-16 11:05:27.000000000 +0200 +++ gcc/fold-const.c 2010-09-30 15:41:43.787396620 +0200 @@ -11071,6 +11071,133 @@ fold_binary_loc (location_t loc, fold_convert_loc (loc, type, arg0)); } + /* For constants M and N, if M == (1LL << cst) - 1 && (N & M) == M, + ((A & N) + B) & M -> (A + B) & M + Similarly if (N & M) == 0, + ((A | N) + B) & M -> (A + B) & M + and for - instead of + (or unary - instead of +) + and/or ^ instead of |. + If B is constant and (B & M) == 0, fold into A & M. */ + if (host_integerp (arg1, 1)) + { + unsigned HOST_WIDE_INT cst1 = tree_low_cst (arg1, 1); + if (~cst1 && (cst1 & (cst1 + 1)) == 0 + && INTEGRAL_TYPE_P (TREE_TYPE (arg0)) + && (TREE_CODE (arg0) == PLUS_EXPR + || TREE_CODE (arg0) == MINUS_EXPR + || TREE_CODE (arg0) == NEGATE_EXPR) + && (TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0)) + || TREE_CODE (TREE_TYPE (arg0)) == INTEGER_TYPE)) + { + tree pmop[2]; + int which = 0; + unsigned HOST_WIDE_INT cst0; + + /* Now we know that arg0 is (C + D) or (C - D) or + -C and arg1 (M) is == (1LL << cst) - 1. + Store C into PMOP[0] and D into PMOP[1]. */ + pmop[0] = TREE_OPERAND (arg0, 0); + pmop[1] = NULL; + if (TREE_CODE (arg0) != NEGATE_EXPR) + { + pmop[1] = TREE_OPERAND (arg0, 1); + which = 1; + } + + if (!host_integerp (TYPE_MAX_VALUE (TREE_TYPE (arg0)), 1) + || (tree_low_cst (TYPE_MAX_VALUE (TREE_TYPE (arg0)), 1) + & cst1) != cst1) + which = -1; + + for (; which >= 0; which--) + switch (TREE_CODE (pmop[which])) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + if (TREE_CODE (TREE_OPERAND (pmop[which], 1)) + != INTEGER_CST) + break; + /* tree_low_cst not used, because we don't care about + the upper bits. */ + cst0 = TREE_INT_CST_LOW (TREE_OPERAND (pmop[which], 1)); + cst0 &= cst1; + if (TREE_CODE (pmop[which]) == BIT_AND_EXPR) + { + if (cst0 != cst1) + break; + } + else if (cst0 != 0) + break; + /* If C or D is of the form (A & N) where + (N & M) == M, or of the form (A | N) or + (A ^ N) where (N & M) == 0, replace it with A. */ + pmop[which] = TREE_OPERAND (pmop[which], 0); + break; + case INTEGER_CST: + /* If C or D is a N where (N & M) == 0, it can be + omitted (assumed 0). */ + if ((TREE_CODE (arg0) == PLUS_EXPR + || (TREE_CODE (arg0) == MINUS_EXPR && which == 0)) + && (TREE_INT_CST_LOW (pmop[which]) & cst1) == 0) + pmop[which] = NULL; + break; + default: + break; + } + + /* Only build anything new if we optimized one or both arguments + above. */ + if (pmop[0] != TREE_OPERAND (arg0, 0) + || (TREE_CODE (arg0) != NEGATE_EXPR + && pmop[1] != TREE_OPERAND (arg0, 1))) + { + tree utype = type; + if (! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0))) + { + /* Perform the operations in a type that has defined + overflow behavior. */ + utype = unsigned_type_for (type); + if (pmop[0] != NULL) + pmop[0] = fold_convert_loc (loc, utype, pmop[0]); + if (pmop[1] != NULL) + pmop[1] = fold_convert_loc (loc, utype, pmop[1]); + } + + if (TREE_CODE (arg0) == NEGATE_EXPR) + tem = fold_build1_loc (loc, NEGATE_EXPR, utype, pmop[0]); + else if (TREE_CODE (arg0) == PLUS_EXPR) + { + if (pmop[0] != NULL && pmop[1] != NULL) + tem = fold_build2_loc (loc, PLUS_EXPR, utype, + pmop[0], pmop[1]); + else if (pmop[0] != NULL) + tem = pmop[0]; + else if (pmop[1] != NULL) + tem = pmop[1]; + else + return build_int_cst (type, 0); + } + else if (pmop[0] == NULL) + tem = fold_build1_loc (loc, NEGATE_EXPR, utype, pmop[1]); + else + tem = fold_build2_loc (loc, MINUS_EXPR, utype, + pmop[0], pmop[1]); + /* TEM is now the new binary +, - or unary - replacement. */ + if (utype == type) + return fold_build2_loc (loc, BIT_AND_EXPR, type, + tem, arg1); + else + { + tem = fold_build2_loc (loc, BIT_AND_EXPR, utype, tem, + fold_convert_loc (loc, utype, + arg1)); + return fold_convert_loc (loc, type, tem); + } + } + } + } + t1 = distribute_bit_expr (loc, code, type, arg0, arg1); if (t1 != NULL_TREE) return t1; --- gcc/testsuite/gcc.dg/tree-ssa/pr31261.c.jj 2010-09-29 09:48:36.374396699 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr31261.c 2010-09-29 10:12:49.612377621 +0200 @@ -0,0 +1,40 @@ +/* PR tree-optimization/31261 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-original" } */ + +unsigned int +f1 (unsigned int a) +{ + return (8 - (a & 7)) & 7; +} + +long int +f2 (long int b) +{ + return (16 + (b & 7)) & 15; +} + +char +f3 (char c) +{ + return -(c & 63) & 31; +} + +int +f4 (int d) +{ + return (12 - (d & 15)) & 7; +} + +int +f5 (int e) +{ + return (12 - (e & 7)) & 15; +} + +/* { dg-final { scan-tree-dump-times "return -a \& 7;" 1 "original" } } */ +/* { dg-final { scan-tree-dump-times "return b \& 7;" 1 "original" } } */ +/* { dg-final { scan-tree-dump-times "return \\(char\\) -\\(unsigned char\\) c \& 31;" 1 "original" } } */ +/* { dg-final { scan-tree-dump-times "return \\(int\\) \\(12 - \\(unsigned int\\) d\\) \& 7;" 1 "original" } } */ +/* { dg-final { scan-tree-dump-times "return 12 - \\(e \& 7\\) \& 15;" 1 "original" } } */ +/* { dg-final { cleanup-tree-dump "original" } } */