Message ID | 333c6a5ad2f1c56613c4734a830fc5d8214061d9.1536144068.git.ams@codesourcery.com |
---|---|
State | New |
Headers | show |
Series | AMD GCN Port | expand |
<ams@codesourcery.com> writes: > This patch was part of the original patch we acquired from Honza and Martin. > > It simplifies vector elements that are inactive, according to the mask. > > 2018-09-05 Jan Hubicka <jh@suse.cz> > Martin Jambor <mjambor@suse.cz> > > * simplify-rtx.c (simplify_merge_mask): New function. > (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the > same masks are used in op1 or op2. Would be good to have self-tests for the new transforms. > +/* X is an operand number OP of VEC_MERGE operation with MASK. "of a". Might also be worth mentioning that X can be a nested operation of a VEC_MERGE with a different mode, although it always has the same number of elements as MASK. > + Try to simplify using knowledge that values outside of MASK "simplify X" > + will not be used. */ > + > +rtx > +simplify_merge_mask (rtx x, rtx mask, int op) > +{ > + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); > + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); > + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) > + { > + if (!side_effects_p (XEXP (x, 1 - op))) > + return XEXP (x, op); > + } > + if (side_effects_p (x)) > + return NULL_RTX; > + if (UNARY_P (x) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) > + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)) known_eq, since we require equality for correctness. Same for the other tests. > + { > + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); > + if (top0) > + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, > + GET_MODE (XEXP (x, 0))); > + } > + if (BINARY_P (x) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) > + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) > + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)) > + { > + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); > + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); > + if (top0 || top1) > + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), > + top0 ? top0 : XEXP (x, 0), > + top1 ? top1 : XEXP (x, 1)); > + } > + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) > + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) > + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) > + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits)) > + { > + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); > + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); > + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); > + if (top0 || top1) > + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), > + GET_MODE (XEXP (x, 0)), > + top0 ? top0 : XEXP (x, 0), > + top1 ? top1 : XEXP (x, 1), > + top2 ? top2 : XEXP (x, 2)); > + } > + return NULL_RTX; > +} > + > > /* Simplify CODE, an operation with result mode MODE and three operands, > OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became > @@ -5967,6 +6026,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, > && !side_effects_p (op2) && !side_effects_p (op1)) > return op0; > > + if (!side_effects_p (op2)) > + { > + rtx top0 = simplify_merge_mask (op0, op2, 0); > + rtx top1 = simplify_merge_mask (op1, op2, 1); > + if (top0 || top1) > + return simplify_gen_ternary (code, mode, mode, > + top0 ? top0 : op0, > + top1 ? top1 : op1, op2); > + } > + > + if (GET_CODE (op0) == VEC_MERGE > + && rtx_equal_p (op2, XEXP (op0, 2)) > + && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2)) > + return simplify_gen_ternary (code, mode, mode, > + XEXP (op0, 0), op1, op2); > + > + if (GET_CODE (op1) == VEC_MERGE > + && rtx_equal_p (op2, XEXP (op1, 2)) > + && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2)) > + return simplify_gen_ternary (code, mode, mode, > + XEXP (op0, 1), op1, op2); Doesn't simplify_merge_mask make the second two redundant? I couldn't see the difference between them and the first condition tested by simplify_merge_mask. Thanks, Richard
On 17/09/18 10:05, Richard Sandiford wrote: > Would be good to have self-tests for the new transforms. [...] > known_eq, since we require equality for correctness. Same for the > other tests. How about the attached? I've made the edits you requested and written some self-tests. > Doesn't simplify_merge_mask make the second two redundant? I couldn't > see the difference between them and the first condition tested by > simplify_merge_mask. Yes, I think you're right. Removed, now. Andrew Simplify vec_merge according to the mask. This patch was part of the original patch we acquired from Honza and Martin. It simplifies nested vec_merge operations using the same mask. Self-tests are included. 2018-09-20 Andrew Stubbs <ams@codesourcery.com> Jan Hubicka <jh@suse.cz> Martin Jambor <mjambor@suse.cz> * simplify-rtx.c (simplify_merge_mask): New function. (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the same masks are used in op1 or op2. (test_vec_merge): New function. (test_vector_ops): Call test_vec_merge. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index f77e1aa..13b2882 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) return NULL_RTX; } +/* Try to simplify nested VEC_MERGE operations by comparing the masks. The + nested operations need not use the same vector mode, but must have the same + number of elements. + + X is an operand number OP of a VEC_MERGE operation with MASK. + Returns NULL_RTX if no simplification is possible. */ + +rtx +simplify_merge_mask (rtx x, rtx mask, int op) +{ + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) + { + if (!side_effects_p (XEXP (x, 1 - op))) + return XEXP (x, op); + } + if (side_effects_p (x)) + return NULL_RTX; + if (UNARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + if (top0) + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, + GET_MODE (XEXP (x, 0))); + } + if (BINARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + if (top0 || top1) + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1)); + } + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); + if (top0 || top1) + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), + GET_MODE (XEXP (x, 0)), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1), + top2 ? top2 : XEXP (x, 2)); + } + return NULL_RTX; +} + /* Simplify CODE, an operation with result mode MODE and three operands, OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became @@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, && !side_effects_p (op2) && !side_effects_p (op1)) return op0; + if (!side_effects_p (op2)) + { + rtx top0 = simplify_merge_mask (op0, op2, 0); + rtx top1 = simplify_merge_mask (op1, op2, 1); + if (top0 || top1) + return simplify_gen_ternary (code, mode, mode, + top0 ? top0 : op0, + top1 ? top1 : op1, op2); + } + break; default: @@ -6932,6 +7004,71 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) constm1_rtx)); } +/* Verify simplify_merge_mask works correctly. */ + +static void +test_vec_merge (machine_mode mode) +{ + rtx op0 = make_test_reg (mode); + rtx op1 = make_test_reg (mode); + rtx op2 = make_test_reg (mode); + rtx op3 = make_test_reg (mode); + rtx op4 = make_test_reg (mode); + rtx op5 = make_test_reg (mode); + rtx mask1 = make_test_reg (SImode); + rtx mask2 = make_test_reg (SImode); + rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1); + rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1); + rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1); + + /* Simple vec_merge. */ + ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0)); + ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1)); + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0)); + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1)); + + /* Nested vec_merge. */ + rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1); + ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0)); + ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1)); + + /* Intermediate unary op. */ + rtx unop = gen_rtx_NOT (mode, vm1); + ASSERT_EQ (op0, XEXP (simplify_merge_mask (unop, mask1, 0), 0)); + ASSERT_EQ (op1, XEXP (simplify_merge_mask (unop, mask1, 1), 0)); + + /* Intermediate binary op. */ + rtx binop = gen_rtx_PLUS (mode, vm1, vm2); + rtx res = simplify_merge_mask (binop, mask1, 0); + ASSERT_EQ (op0, XEXP (res, 0)); + ASSERT_EQ (op2, XEXP (res, 1)); + res = simplify_merge_mask (binop, mask1, 1); + ASSERT_EQ (op1, XEXP (res, 0)); + ASSERT_EQ (op3, XEXP (res, 1)); + + /* Intermediate ternary op. */ + rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3); + res = simplify_merge_mask (tenop, mask1, 0); + ASSERT_EQ (op0, XEXP (res, 0)); + ASSERT_EQ (op2, XEXP (res, 1)); + ASSERT_EQ (op4, XEXP (res, 2)); + res = simplify_merge_mask (tenop, mask1, 1); + ASSERT_EQ (op1, XEXP (res, 0)); + ASSERT_EQ (op3, XEXP (res, 1)); + ASSERT_EQ (op5, XEXP (res, 2)); + + /* Side effects. */ + rtx badop0 = gen_rtx_PRE_INC (mode, op0); + rtx badvm = gen_rtx_VEC_MERGE (mode, badop0, op1, mask1); + ASSERT_EQ (badop0, simplify_merge_mask (badvm, mask1, 0)); + ASSERT_EQ (NULL_RTX, simplify_merge_mask (badvm, mask1, 1)); + + /* Called indirectly. */ + res = simplify_rtx (nvm); + ASSERT_EQ (op0, XEXP (res, 0)); + ASSERT_EQ (op3, XEXP (res, 1)); +} + /* Verify some simplifications involving vectors. */ static void @@ -6947,6 +7084,7 @@ test_vector_ops () if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT && maybe_gt (GET_MODE_NUNITS (mode), 2)) test_vector_ops_series (mode, scalar_reg); + test_vec_merge (mode); } } }
Ping. On 20/09/18 16:26, Andrew Stubbs wrote: > On 17/09/18 10:05, Richard Sandiford wrote: >> Would be good to have self-tests for the new transforms. > [...] >> known_eq, since we require equality for correctness. Same for the >> other tests. > > How about the attached? I've made the edits you requested and written > some self-tests. > >> Doesn't simplify_merge_mask make the second two redundant? I couldn't >> see the difference between them and the first condition tested by >> simplify_merge_mask. > > Yes, I think you're right. Removed, now. > > Andrew >
Andrew Stubbs <ams@codesourcery.com> writes: > On 17/09/18 10:05, Richard Sandiford wrote: >> Would be good to have self-tests for the new transforms. > [...] >> known_eq, since we require equality for correctness. Same for the >> other tests. > > How about the attached? I've made the edits you requested and written > some self-tests. > >> Doesn't simplify_merge_mask make the second two redundant? I couldn't >> see the difference between them and the first condition tested by >> simplify_merge_mask. > > Yes, I think you're right. Removed, now. > > Andrew > > Simplify vec_merge according to the mask. > > This patch was part of the original patch we acquired from Honza and Martin. > > It simplifies nested vec_merge operations using the same mask. > > Self-tests are included. > > 2018-09-20 Andrew Stubbs <ams@codesourcery.com> > Jan Hubicka <jh@suse.cz> > Martin Jambor <mjambor@suse.cz> > > * simplify-rtx.c (simplify_merge_mask): New function. > (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the > same masks are used in op1 or op2. > (test_vec_merge): New function. > (test_vector_ops): Call test_vec_merge. > > diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c > index f77e1aa..13b2882 100644 > --- a/gcc/simplify-rtx.c > +++ b/gcc/simplify-rtx.c > @@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) > return NULL_RTX; > } > > +/* Try to simplify nested VEC_MERGE operations by comparing the masks. The > + nested operations need not use the same vector mode, but must have the same > + number of elements. > + > + X is an operand number OP of a VEC_MERGE operation with MASK. > + Returns NULL_RTX if no simplification is possible. */ X isn't always operand OP, it can be nested within it. How about: /* Try to simplify X given that it appears within operand OP of a VEC_MERGE operation whose mask is MASK. X need not use the same vector mode as the VEC_MERGE, but it must have the same number of elements. Return the simplified X on success, otherwise return NULL_RTX. */ > + > +rtx > +simplify_merge_mask (rtx x, rtx mask, int op) > +{ > + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); > + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); > + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) > + { > + if (!side_effects_p (XEXP (x, 1 - op))) > + return XEXP (x, op); > + } > + if (side_effects_p (x)) > + return NULL_RTX; > + if (UNARY_P (x) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) > + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)) > + { > + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); > + if (top0) > + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, > + GET_MODE (XEXP (x, 0))); > + } > + if (BINARY_P (x) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) > + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) > + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)) > + { > + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); > + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); > + if (top0 || top1) > + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), > + top0 ? top0 : XEXP (x, 0), > + top1 ? top1 : XEXP (x, 1)); > + } > + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) > + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) > + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits) > + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) > + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits)) > + { > + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); > + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); > + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); > + if (top0 || top1) || top2? > + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), > + GET_MODE (XEXP (x, 0)), > + top0 ? top0 : XEXP (x, 0), > + top1 ? top1 : XEXP (x, 1), > + top2 ? top2 : XEXP (x, 2)); > + } > + return NULL_RTX; > +} > + > > /* Simplify CODE, an operation with result mode MODE and three operands, > OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became > @@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, > && !side_effects_p (op2) && !side_effects_p (op1)) > return op0; > > + if (!side_effects_p (op2)) > + { > + rtx top0 = simplify_merge_mask (op0, op2, 0); > + rtx top1 = simplify_merge_mask (op1, op2, 1); > + if (top0 || top1) > + return simplify_gen_ternary (code, mode, mode, > + top0 ? top0 : op0, > + top1 ? top1 : op1, op2); > + } > + > break; > > default: > @@ -6932,6 +7004,71 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) > constm1_rtx)); > } > > +/* Verify simplify_merge_mask works correctly. */ > + > +static void > +test_vec_merge (machine_mode mode) > +{ > + rtx op0 = make_test_reg (mode); > + rtx op1 = make_test_reg (mode); > + rtx op2 = make_test_reg (mode); > + rtx op3 = make_test_reg (mode); > + rtx op4 = make_test_reg (mode); > + rtx op5 = make_test_reg (mode); > + rtx mask1 = make_test_reg (SImode); > + rtx mask2 = make_test_reg (SImode); > + rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1); > + rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1); > + rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1); > + > + /* Simple vec_merge. */ > + ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0)); > + ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1)); > + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0)); > + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1)); > + > + /* Nested vec_merge. */ > + rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1); > + ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0)); > + ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1)); Think the last two should simplify to op0 and op3, which I guess means recursing on the "return XEXP (x, op);" > + /* Intermediate unary op. */ > + rtx unop = gen_rtx_NOT (mode, vm1); > + ASSERT_EQ (op0, XEXP (simplify_merge_mask (unop, mask1, 0), 0)); > + ASSERT_EQ (op1, XEXP (simplify_merge_mask (unop, mask1, 1), 0)); > + > + /* Intermediate binary op. */ > + rtx binop = gen_rtx_PLUS (mode, vm1, vm2); > + rtx res = simplify_merge_mask (binop, mask1, 0); > + ASSERT_EQ (op0, XEXP (res, 0)); > + ASSERT_EQ (op2, XEXP (res, 1)); > + res = simplify_merge_mask (binop, mask1, 1); > + ASSERT_EQ (op1, XEXP (res, 0)); > + ASSERT_EQ (op3, XEXP (res, 1)); > + > + /* Intermediate ternary op. */ > + rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3); > + res = simplify_merge_mask (tenop, mask1, 0); > + ASSERT_EQ (op0, XEXP (res, 0)); > + ASSERT_EQ (op2, XEXP (res, 1)); > + ASSERT_EQ (op4, XEXP (res, 2)); > + res = simplify_merge_mask (tenop, mask1, 1); > + ASSERT_EQ (op1, XEXP (res, 0)); > + ASSERT_EQ (op3, XEXP (res, 1)); > + ASSERT_EQ (op5, XEXP (res, 2)); > [...] > + /* Called indirectly. */ > + res = simplify_rtx (nvm); > + ASSERT_EQ (op0, XEXP (res, 0)); > + ASSERT_EQ (op3, XEXP (res, 1)); Would probably be better to ASSERT_RTX_EQ against the full simplified rtx, e.g. gen_rtx_NOT (mode, op0) Thanks, Richard
On 26/09/18 17:48, Richard Sandiford wrote: > Andrew Stubbs <ams@codesourcery.com> writes: >> + /* Nested vec_merge. */ >> + rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1); >> + ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0)); >> + ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1)); > > Think the last two should simplify to op0 and op3, which I guess > means recursing on the "return XEXP (x, op);" I thought about doing that, but I noticed that, for example, simplify_gen_unary does not recurse into its operand. Is that an omission, or is it expected that those operands will already have been simplified? Andrew
Andrew Stubbs <ams@codesourcery.com> writes: > On 26/09/18 17:48, Richard Sandiford wrote: >> Andrew Stubbs <ams@codesourcery.com> writes: >>> + /* Nested vec_merge. */ >>> + rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1); >>> + ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0)); >>> + ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1)); >> >> Think the last two should simplify to op0 and op3, which I guess >> means recursing on the "return XEXP (x, op);" > > I thought about doing that, but I noticed that, for example, > simplify_gen_unary does not recurse into its operand. Is that an > omission, or is it expected that those operands will already have been > simplified? Ah, yeah, each operand should already fully be simplified. But then the only thing we testing here compared to: /* Simple vec_merge. */ ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0)); ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1)); is that we *don't* recurse. It would be worth adding a comment to say that, since if we both thought about it, I guess whoever comes next will too. And the assumption that existing VEC_MERGEs are fully simplified means we should return null: if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) { if (!side_effects_p (XEXP (x, 1 - op))) return XEXP (x, op); --->here } On keeping the complexity down: if (side_effects_p (x)) return NULL_RTX; makes this quadratic for chains of unary operations. Is it really needed? The code after it simply recurses on operands and doesn't discard anything itself, so it looks like the VEC_MERGE call to side_effects_p would be enough. Richard
On 27/09/18 08:16, Richard Sandiford wrote: > On keeping the complexity down: > > if (side_effects_p (x)) > return NULL_RTX; > > makes this quadratic for chains of unary operations. Is it really > needed? The code after it simply recurses on operands and doesn't > discard anything itself, so it looks like the VEC_MERGE call to > side_effects_p would be enough. The two calls do not check the same thing. The other one checks the other operand of a vec_merge, and this checks the current operand. I suppose it's safe to discard a VEC_MERGE when the chosen operand contains side effects, but I'm not so sure when the VEC_MERGE itself is an operand to an operator with side effects. I'm having a hard time inventing a scenario in which a PRE_INC could contain a VEC_MERGE, but maybe a volatile MEM or ASM_OPERANDS could do? Conversely, I don't see that side-effects deep down in an expression should stop us transforming it as a high level. Is there an equivalent to side_effects_p that doesn't recurse? Should there be? Andrew
Andrew Stubbs <ams@codesourcery.com> writes: > On 27/09/18 08:16, Richard Sandiford wrote: >> On keeping the complexity down: >> >> if (side_effects_p (x)) >> return NULL_RTX; >> >> makes this quadratic for chains of unary operations. Is it really >> needed? The code after it simply recurses on operands and doesn't >> discard anything itself, so it looks like the VEC_MERGE call to >> side_effects_p would be enough. > > The two calls do not check the same thing. The other one checks the > other operand of a vec_merge, and this checks the current operand. > > I suppose it's safe to discard a VEC_MERGE when the chosen operand > contains side effects, but I'm not so sure when the VEC_MERGE itself is > an operand to an operator with side effects. I'm having a hard time > inventing a scenario in which a PRE_INC could contain a VEC_MERGE, but > maybe a volatile MEM or ASM_OPERANDS could do? But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they have the wrong rtx class. AFAICT no current unary, binary or ternary operator has that level of side-effect (and that's a good thing). We also don't guarantee to preserve FP exceptions as side-effects. > Conversely, I don't see that side-effects deep down in an expression > should stop us transforming it as a high level. > > Is there an equivalent to side_effects_p that doesn't recurse? Should > there be? Not aware of an existing function, and it might be useful to have one at some point. Just not sure we need it for this. Richard
On 27/09/18 17:19, Richard Sandiford wrote: > But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they > have the wrong rtx class. AFAICT no current unary, binary or ternary > operator has that level of side-effect (and that's a good thing). OK, in that case I'll remove it and we can cross that bridge if we come to it. This patch should also address your other concerns. OK? Andrew Simplify vec_merge according to the mask. This patch was part of the original patch we acquired from Honza and Martin. It simplifies nested vec_merge operations using the same mask. Self-tests are included. 2018-09-27 Andrew Stubbs <ams@codesourcery.com> Jan Hubicka <jh@suse.cz> Martin Jambor <mjambor@suse.cz> * simplify-rtx.c (simplify_merge_mask): New function. (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the same masks are used in op1 or op2. (test_vec_merge): New function. (test_vector_ops): Call test_vec_merge. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index b4c6883..9bc5386 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) return NULL_RTX; } +/* Try to simplify X given that it appears within operand OP of a + VEC_MERGE operation whose mask is MASK. X need not use the same + vector mode as the VEC_MERGE, but it must have the same number of + elements. + + Return the simplified X on success, otherwise return NULL_RTX. */ + +rtx +simplify_merge_mask (rtx x, rtx mask, int op) +{ + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) + { + if (side_effects_p (XEXP (x, 1 - op))) + return NULL_RTX; + + return XEXP (x, op); + } + if (UNARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + if (top0) + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, + GET_MODE (XEXP (x, 0))); + } + if (BINARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + if (top0 || top1) + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1)); + } + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); + if (top0 || top1 || top2) + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), + GET_MODE (XEXP (x, 0)), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1), + top2 ? top2 : XEXP (x, 2)); + } + return NULL_RTX; +} + /* Simplify CODE, an operation with result mode MODE and three operands, OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became @@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, && !side_effects_p (op2) && !side_effects_p (op1)) return op0; + if (!side_effects_p (op2)) + { + rtx top0 = simplify_merge_mask (op0, op2, 0); + rtx top1 = simplify_merge_mask (op1, op2, 1); + if (top0 || top1) + return simplify_gen_ternary (code, mode, mode, + top0 ? top0 : op0, + top1 ? top1 : op1, op2); + } + break; default: @@ -6856,6 +6928,69 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) constm1_rtx)); } +/* Verify simplify_merge_mask works correctly. */ + +static void +test_vec_merge (machine_mode mode) +{ + rtx op0 = make_test_reg (mode); + rtx op1 = make_test_reg (mode); + rtx op2 = make_test_reg (mode); + rtx op3 = make_test_reg (mode); + rtx op4 = make_test_reg (mode); + rtx op5 = make_test_reg (mode); + rtx mask1 = make_test_reg (SImode); + rtx mask2 = make_test_reg (SImode); + rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1); + rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1); + rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1); + + /* Simple vec_merge. */ + ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0)); + ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1)); + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0)); + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1)); + + /* Nested vec_merge. + It's tempting to make this simplify right down to opN, but we don't + because all the simplify_* functions assume that the operands have + already been simplified. */ + rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1); + ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0)); + ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1)); + + /* Intermediate unary op. */ + rtx unop = gen_rtx_NOT (mode, vm1); + ASSERT_RTX_EQ (gen_rtx_NOT (mode, op0), + simplify_merge_mask (unop, mask1, 0)); + ASSERT_RTX_EQ (gen_rtx_NOT (mode, op1), + simplify_merge_mask (unop, mask1, 1)); + + /* Intermediate binary op. */ + rtx binop = gen_rtx_PLUS (mode, vm1, vm2); + ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op0, op2), + simplify_merge_mask (binop, mask1, 0)); + ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op1, op3), + simplify_merge_mask (binop, mask1, 1)); + + /* Intermediate ternary op. */ + rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3); + ASSERT_RTX_EQ (gen_rtx_FMA (mode, op0, op2, op4), + simplify_merge_mask (tenop, mask1, 0)); + ASSERT_RTX_EQ (gen_rtx_FMA (mode, op1, op3, op5), + simplify_merge_mask (tenop, mask1, 1)); + + /* Side effects. */ + rtx badop0 = gen_rtx_PRE_INC (mode, op0); + rtx badvm = gen_rtx_VEC_MERGE (mode, badop0, op1, mask1); + ASSERT_EQ (badop0, simplify_merge_mask (badvm, mask1, 0)); + ASSERT_EQ (NULL_RTX, simplify_merge_mask (badvm, mask1, 1)); + + /* Called indirectly. */ + ASSERT_RTX_EQ (gen_rtx_VEC_MERGE (mode, op0, op3, mask1), + simplify_rtx (nvm)); +} + /* Verify some simplifications involving vectors. */ static void @@ -6871,6 +7006,7 @@ test_vector_ops () if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT && maybe_gt (GET_MODE_NUNITS (mode), 2)) test_vector_ops_series (mode, scalar_reg); + test_vec_merge (mode); } } }
Andrew Stubbs <ams@codesourcery.com> writes: > On 27/09/18 17:19, Richard Sandiford wrote: >> But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they >> have the wrong rtx class. AFAICT no current unary, binary or ternary >> operator has that level of side-effect (and that's a good thing). > > OK, in that case I'll remove it and we can cross that bridge if we come > to it. > > This patch should also address your other concerns. > > OK? Yes, thanks. Richard
On 28/09/18 09:11, Richard Sandiford wrote:
> Yes, thanks.
Committed.
Thanks for all the reviews. :-)
Andrew
On Fri, Sep 28, 2018 at 6:33 AM Andrew Stubbs <ams@codesourcery.com> wrote: > > On 28/09/18 09:11, Richard Sandiford wrote: > > Yes, thanks. > > Committed. > > Thanks for all the reviews. :-) > This caused: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89445
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 89487f2..6f27bda 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5578,6 +5578,65 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) return NULL_RTX; } +/* X is an operand number OP of VEC_MERGE operation with MASK. + Try to simplify using knowledge that values outside of MASK + will not be used. */ + +rtx +simplify_merge_mask (rtx x, rtx mask, int op) +{ + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) + { + if (!side_effects_p (XEXP (x, 1 - op))) + return XEXP (x, op); + } + if (side_effects_p (x)) + return NULL_RTX; + if (UNARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + if (top0) + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, + GET_MODE (XEXP (x, 0))); + } + if (BINARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + if (top0 || top1) + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1)); + } + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); + if (top0 || top1) + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), + GET_MODE (XEXP (x, 0)), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1), + top2 ? top2 : XEXP (x, 2)); + } + return NULL_RTX; +} + /* Simplify CODE, an operation with result mode MODE and three operands, OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became @@ -5967,6 +6026,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, && !side_effects_p (op2) && !side_effects_p (op1)) return op0; + if (!side_effects_p (op2)) + { + rtx top0 = simplify_merge_mask (op0, op2, 0); + rtx top1 = simplify_merge_mask (op1, op2, 1); + if (top0 || top1) + return simplify_gen_ternary (code, mode, mode, + top0 ? top0 : op0, + top1 ? top1 : op1, op2); + } + + if (GET_CODE (op0) == VEC_MERGE + && rtx_equal_p (op2, XEXP (op0, 2)) + && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2)) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 0), op1, op2); + + if (GET_CODE (op1) == VEC_MERGE + && rtx_equal_p (op2, XEXP (op1, 2)) + && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2)) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 1), op1, op2); + break; default: