Message ID | BANLkTi=X2npY51SP3-b_OoV5rTWmg_ZvwA@mail.gmail.com |
---|---|
State | New |
Headers | show |
On 04/20/2011 08:22 AM, Kai Tietz wrote: > + if (TREE_CODE (arg0) == BIT_AND_EXPR > + && TREE_CODE (arg1) == BIT_AND_EXPR) > + { > + tree a0, a1, l0, l1, n0, n1; > + > + a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); > + a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); > + > + l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); > + l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); > + > + n0 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l0); > + n1 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l1); > + > + if ((operand_equal_p (n0, a0, 0) > + && operand_equal_p (n1, a1, 0)) > + || (operand_equal_p (n0, a1, 0) > + && operand_equal_p (n1, a0, 0))) > + return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1); First, you typoed BIT_XOR_EXPR in this first block. Second, I don't see how you're arbitrarily choosing L0 and N1 in the expansion. If you write the expression the other way around, (~x & y) | (x & ~y) don't you wind up with (~x ^ ~y) ? Or do the extra NOT expressions get folded away anyway? > + if (TREE_CODE (arg0) == TREE_CODE (arg1) > + && (TREE_CODE (arg1) == TRUTH_AND_EXPR > + || TREE_CODE (arg1) == TRUTH_ANDIF_EXPR)) I don't believe you want to apply this transformation with ANDIF. r~
On Wed, Apr 20, 2011 at 05:22:31PM +0200, Kai Tietz wrote: > --- gcc.orig/gcc/fold-const.c 2011-04-20 17:10:39.478091900 +0200 > +++ gcc/gcc/fold-const.c 2011-04-20 17:11:22.901039400 +0200 > @@ -10660,6 +10660,28 @@ fold_binary_loc (location_t loc, > && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0))) > return omit_one_operand_loc (loc, type, arg0, TREE_OPERAND (arg1, 0)); > > + /* (X & ~Y) | (~X & Y) is X ^ Y */ > + if (TREE_CODE (arg0) == BIT_AND_EXPR > + && TREE_CODE (arg1) == BIT_AND_EXPR) > + { > + tree a0, a1, l0, l1, n0, n1; > + > + a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); > + a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); > + > + l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); > + l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); > + > + n0 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l0); > + n1 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l1); > + > + if ((operand_equal_p (n0, a0, 0) > + && operand_equal_p (n1, a1, 0)) > + || (operand_equal_p (n0, a1, 0) > + && operand_equal_p (n1, a0, 0))) > + return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1); > + } > + I must say I don't like first folding/building new trees, then testing and then maybe optimizing, that is slow and creates unnecessary garbage in the likely case the optimization can't do anything. Wouldn't something like: int arg0_not = TREE_CODE (TREE_OPERAND (arg0, 1)) == BIT_NOT_EXPR; int arg1_not = TREE_CODE (TREE_OPERAND (arg1, 1)) == BIT_NOT_EXPR; if (TREE_CODE (TREE_OPERAND (arg0, arg0_not)) == BIT_NOT_EXPR && TREE_CODE (TREE_OPERAND (arg1, arg1_not)) == BIT_NOT_EXPR && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg0, arg0_not), 0), TREE_OPERAND (arg1, 1 - arg1_not), 0) && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg1, arg1_not), 0), TREE_OPERAND (arg0, 1 - arg0_not), 0)) return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1 - arg0_not)), fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1 - arg1_not))); work better? Jakub
2011/4/20 Jakub Jelinek <jakub@redhat.com>: > On Wed, Apr 20, 2011 at 05:22:31PM +0200, Kai Tietz wrote: >> --- gcc.orig/gcc/fold-const.c 2011-04-20 17:10:39.478091900 +0200 >> +++ gcc/gcc/fold-const.c 2011-04-20 17:11:22.901039400 +0200 >> @@ -10660,6 +10660,28 @@ fold_binary_loc (location_t loc, >> && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0))) >> return omit_one_operand_loc (loc, type, arg0, TREE_OPERAND (arg1, 0)); >> >> + /* (X & ~Y) | (~X & Y) is X ^ Y */ >> + if (TREE_CODE (arg0) == BIT_AND_EXPR >> + && TREE_CODE (arg1) == BIT_AND_EXPR) >> + { >> + tree a0, a1, l0, l1, n0, n1; >> + >> + a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); >> + a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); >> + >> + l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); >> + l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); >> + >> + n0 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l0); >> + n1 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l1); >> + >> + if ((operand_equal_p (n0, a0, 0) >> + && operand_equal_p (n1, a1, 0)) >> + || (operand_equal_p (n0, a1, 0) >> + && operand_equal_p (n1, a0, 0))) >> + return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1); >> + } >> + > > I must say I don't like first folding/building new trees, then testing > and then maybe optimizing, that is slow and creates unnecessary garbage > in the likely case the optimization can't do anything. > > Wouldn't something like: > int arg0_not = TREE_CODE (TREE_OPERAND (arg0, 1)) == BIT_NOT_EXPR; > int arg1_not = TREE_CODE (TREE_OPERAND (arg1, 1)) == BIT_NOT_EXPR; > if (TREE_CODE (TREE_OPERAND (arg0, arg0_not)) == BIT_NOT_EXPR > && TREE_CODE (TREE_OPERAND (arg1, arg1_not)) == BIT_NOT_EXPR > && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg0, arg0_not), 0), > TREE_OPERAND (arg1, 1 - arg1_not), 0) > && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg1, arg1_not), 0), > TREE_OPERAND (arg0, 1 - arg0_not), 0)) > return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, > fold_convert_loc (loc, type, > TREE_OPERAND (arg0, 1 - arg0_not)), > fold_convert_loc (loc, type, > TREE_OPERAND (arg1, 1 - arg1_not))); > work better? > > Jakub > Well, as special case we could use that, but we have here also to handle integer-values, so I used fold to make sure I get inverse. Also there might be some transformations, which otherwise might be not caught, like !(X || Y) == !X && !Y ... Regards, Kai
On Wed, Apr 20, 2011 at 5:58 PM, Kai Tietz <ktietz70@googlemail.com> wrote: > 2011/4/20 Jakub Jelinek <jakub@redhat.com>: >> On Wed, Apr 20, 2011 at 05:22:31PM +0200, Kai Tietz wrote: >>> --- gcc.orig/gcc/fold-const.c 2011-04-20 17:10:39.478091900 +0200 >>> +++ gcc/gcc/fold-const.c 2011-04-20 17:11:22.901039400 +0200 >>> @@ -10660,6 +10660,28 @@ fold_binary_loc (location_t loc, >>> && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0))) >>> return omit_one_operand_loc (loc, type, arg0, TREE_OPERAND (arg1, 0)); >>> >>> + /* (X & ~Y) | (~X & Y) is X ^ Y */ >>> + if (TREE_CODE (arg0) == BIT_AND_EXPR >>> + && TREE_CODE (arg1) == BIT_AND_EXPR) >>> + { >>> + tree a0, a1, l0, l1, n0, n1; >>> + >>> + a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); >>> + a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); >>> + >>> + l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); >>> + l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); >>> + >>> + n0 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l0); >>> + n1 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l1); >>> + >>> + if ((operand_equal_p (n0, a0, 0) >>> + && operand_equal_p (n1, a1, 0)) >>> + || (operand_equal_p (n0, a1, 0) >>> + && operand_equal_p (n1, a0, 0))) >>> + return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1); >>> + } >>> + >> >> I must say I don't like first folding/building new trees, then testing >> and then maybe optimizing, that is slow and creates unnecessary garbage >> in the likely case the optimization can't do anything. >> >> Wouldn't something like: >> int arg0_not = TREE_CODE (TREE_OPERAND (arg0, 1)) == BIT_NOT_EXPR; >> int arg1_not = TREE_CODE (TREE_OPERAND (arg1, 1)) == BIT_NOT_EXPR; >> if (TREE_CODE (TREE_OPERAND (arg0, arg0_not)) == BIT_NOT_EXPR >> && TREE_CODE (TREE_OPERAND (arg1, arg1_not)) == BIT_NOT_EXPR >> && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg0, arg0_not), 0), >> TREE_OPERAND (arg1, 1 - arg1_not), 0) >> && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg1, arg1_not), 0), >> TREE_OPERAND (arg0, 1 - arg0_not), 0)) >> return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, >> fold_convert_loc (loc, type, >> TREE_OPERAND (arg0, 1 - arg0_not)), >> fold_convert_loc (loc, type, >> TREE_OPERAND (arg1, 1 - arg1_not))); >> work better? >> >> Jakub >> > > Well, as special case we could use that, but we have here also to > handle integer-values, so I used fold to make sure I get inverse. Also > there might be some transformations, which otherwise might be not > caught, like !(X || Y) == !X && !Y ... Btw, I agree with Jakub. Fold is suppose to not create any garbage if a folding does not apply. So I don't like your patch either. Richard. > Regards, > Kai > > > -- > | (\_/) This is Bunny. Copy and paste > | (='.'=) Bunny into your signature to help > | (")_(") him gain world domination >
Index: gcc/gcc/fold-const.c =================================================================== --- gcc.orig/gcc/fold-const.c 2011-04-20 17:10:39.478091900 +0200 +++ gcc/gcc/fold-const.c 2011-04-20 17:11:22.901039400 +0200 @@ -10660,6 +10660,28 @@ fold_binary_loc (location_t loc, && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0))) return omit_one_operand_loc (loc, type, arg0, TREE_OPERAND (arg1, 0)); + /* (X & ~Y) | (~X & Y) is X ^ Y */ + if (TREE_CODE (arg0) == BIT_AND_EXPR + && TREE_CODE (arg1) == BIT_AND_EXPR) + { + tree a0, a1, l0, l1, n0, n1; + + a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); + a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); + + l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); + l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); + + n0 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l0); + n1 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l1); + + if ((operand_equal_p (n0, a0, 0) + && operand_equal_p (n1, a1, 0)) + || (operand_equal_p (n0, a1, 0) + && operand_equal_p (n1, a0, 0))) + return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1); + } + t1 = distribute_bit_expr (loc, code, type, arg0, arg1); if (t1 != NULL_TREE) return t1; @@ -12039,6 +12061,28 @@ fold_binary_loc (location_t loc, && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0)) return omit_one_operand_loc (loc, type, integer_one_node, arg0); + /* (X && !Y) || (!X && Y) is X ^ Y */ + if (TREE_CODE (arg0) == TREE_CODE (arg1) + && (TREE_CODE (arg1) == TRUTH_AND_EXPR + || TREE_CODE (arg1) == TRUTH_ANDIF_EXPR)) + { + tree a0, a1, l0, l1, n0, n1; + + a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); + a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); + + l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); + l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); + + n0 = fold_build1_loc (loc, TRUTH_NOT_EXPR, type, l0); + n1 = fold_build1_loc (loc, TRUTH_NOT_EXPR, type, l1); + + if ((operand_equal_p (n0, a0, 0) + && operand_equal_p (n1, a1, 0)) + || (operand_equal_p (n0, a1, 0) + && operand_equal_p (n1, a0, 0))) + return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1); + } goto truth_andor; case TRUTH_XOR_EXPR: Index: gcc/gcc/testsuite/gcc.dg/binop-xor1.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc/gcc/testsuite/gcc.dg/binop-xor1.c 2011-04-20 17:11:22.905039900 +0200 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int +foo (int a, int b, int c) +{ + return ((a && !b && c) || (!a && b && c)); +} + +/* We expect to see "<bb N>"; confirm that, so that we know to count + it in the real test. */ +/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 5 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/gcc/testsuite/gcc.dg/binop-xor2.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc/gcc/testsuite/gcc.dg/binop-xor2.c 2011-04-20 17:11:22.908540300 +0200 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int +foo (int a, int b) +{ + return ((a & ~b) | (~a & b)); +} + +/* We expect to see "<bb N>"; confirm that, so that we know to count + it in the real test. */ +/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/gcc/testsuite/gcc.dg/binop-xor3.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc/gcc/testsuite/gcc.dg/binop-xor3.c 2011-04-20 17:11:22.911040600 +0200 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int +foo (int a, int b) +{ + return ((a && !b) || (!a && b)); +} + +/* We expect to see "<bb N>"; confirm that, so that we know to count + it in the real test. */ +/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/gcc/testsuite/gcc.dg/binop-xor4.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc/gcc/testsuite/gcc.dg/binop-xor4.c 2011-04-20 17:11:22.913541000 +0200 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int +foo (int a, int b, int c) +{ + return ((a & ~b) | (~a & b)) & c; +} + +/* We expect to see "<bb N>"; confirm that, so that we know to count + it in the real test. */ +/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/gcc/testsuite/gcc.dg/binop-xor5.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc/gcc/testsuite/gcc.dg/binop-xor5.c 2011-04-20 17:11:22.916541300 +0200 @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int +foo (int a, int b, int c) +{ + return ((a & ~b & c) | (~a & b & c)); +} + +/* We expect to see "<bb N>"; confirm that, so that we know to count + it in the real test. */ +/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\&" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */