Message ID | 55CC8F62.4080604@arm.com |
---|---|
State | New |
Headers | show |
On Thu, Aug 13, 2015 at 01:36:50PM +0100, Kyrill Tkachov wrote: Some comments below. > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 1394ed7..c8bd8d2 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -6652,6 +6652,25 @@ cost_plus: > return true; > > case MOD: > + /* We can expand signed mod by power of 2 using a > + NEGS, two parallel ANDs and a CSNEG. Assume here > + that CSNEG is COSTS_N_INSNS (1). This case should Why do we want to hardcode this assumption rather than parameterise? Even if you model this as the cost of an unconditional NEG I think that is better than hardcoding zero cost. > + only ever be reached through the set_smod_pow2_cheap check > + in expmed.c. */ > + if (CONST_INT_P (XEXP (x, 1)) > + && exact_log2 (INTVAL (XEXP (x, 1))) > 0 > + && (mode == SImode || mode == DImode)) > + { > + *cost += COSTS_N_INSNS (3); Can you add am comment to make it clear why this is not off-by-one? By quick inspection it looks like you have made a typo trying to set the cost to be 3 instructions rather than 4 - a reader needs the extra knowledge that we already have a COSTS_N_INSNS(1) as a baseline. This would be clearer as: /* We will expand to four instructions, reset the baseline. */ *cost = COSTS_N_INSNS (4); > + > + if (speed) > + *cost += 2 * extra_cost->alu.logical > + + extra_cost->alu.arith; > + > + return true; > + } > + > + /* Fall-through. */ > case UMOD: > if (speed) > { > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index b7b04c4..a515573 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -302,6 +302,62 @@ (define_expand "cmp<mode>" > } > ) > > +;; AArch64-specific expansion of signed mod by power of 2 using CSNEG. Seems a strange comment given that we are in aarch64.md :-). > +;; For x0 % n where n is a power of 2 produce: > +;; negs x1, x0 > +;; and x0, x0, #(n - 1) > +;; and x1, x1, #(n - 1) > +;; csneg x0, x0, x1, mi > + > +(define_expand "mod<mode>3" > + [(match_operand:GPI 0 "register_operand" "") > + (match_operand:GPI 1 "register_operand" "") > + (match_operand:GPI 2 "const_int_operand" "")] > + "" > + { > + HOST_WIDE_INT val = INTVAL (operands[2]); > + > + if (val <= 0 > + || exact_log2 (INTVAL (operands[2])) <= 0 > + || !aarch64_bitmask_imm (INTVAL (operands[2]) - 1, <MODE>mode)) > + FAIL; > + > + rtx mask = GEN_INT (val - 1); > + > + /* In the special case of x0 % 2 we can do the even shorter: > + cmp x0, xzr > + and x0, x0, 1 > + csneg x0, x0, x0, ge. */ > + if (val == 2) > + { > + rtx masked = gen_reg_rtx (<MODE>mode); > + rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx); Non-obvious why this is correct given the comment above saying we want GE. > + emit_insn (gen_and<mode>3 (masked, operands[1], mask)); > + rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx); > + emit_insn (gen_csneg3<mode>_insn (operands[0], x, masked, masked)); > + DONE; > + } > + > + rtx neg_op = gen_reg_rtx (<MODE>mode); > + rtx_insn *insn = emit_insn (gen_neg<mode>2_compare0 (neg_op, operands[1])); > + > + /* Extract the condition register and mode. */ > + rtx cmp = XVECEXP (PATTERN (insn), 0, 0); > + rtx cc_reg = SET_DEST (cmp); > + rtx cond = gen_rtx_GE (VOIDmode, cc_reg, const0_rtx); > + > + rtx masked_pos = gen_reg_rtx (<MODE>mode); > + emit_insn (gen_and<mode>3 (masked_pos, operands[1], mask)); > + > + rtx masked_neg = gen_reg_rtx (<MODE>mode); > + emit_insn (gen_and<mode>3 (masked_neg, neg_op, mask)); > + > + emit_insn (gen_csneg3<mode>_insn (operands[0], cond, > + masked_neg, masked_pos)); > + DONE; > + } > +) > + Thanks, James
Hi James, On 01/09/15 10:25, James Greenhalgh wrote: > On Thu, Aug 13, 2015 at 01:36:50PM +0100, Kyrill Tkachov wrote: > > Some comments below. Thanks, I'll incorporate them, with one clarification inline. > >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> index 1394ed7..c8bd8d2 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -6652,6 +6652,25 @@ cost_plus: >> return true; >> >> case MOD: >> + /* We can expand signed mod by power of 2 using a >> + NEGS, two parallel ANDs and a CSNEG. Assume here >> + that CSNEG is COSTS_N_INSNS (1). This case should > Why do we want to hardcode this assumption rather than parameterise? Even > if you model this as the cost of an unconditional NEG I think that is > better than hardcoding zero cost. > > >> + only ever be reached through the set_smod_pow2_cheap check >> + in expmed.c. */ >> + if (CONST_INT_P (XEXP (x, 1)) >> + && exact_log2 (INTVAL (XEXP (x, 1))) > 0 >> + && (mode == SImode || mode == DImode)) >> + { >> + *cost += COSTS_N_INSNS (3); > Can you add am comment to make it clear why this is not off-by-one? By > quick inspection it looks like you have made a typo trying to set the > cost to be 3 instructions rather than 4 - a reader needs the extra > knowledge that we already have a COSTS_N_INSNS(1) as a baseline. > > This would be clearer as: > > /* We will expand to four instructions, reset the baseline. */ > *cost = COSTS_N_INSNS (4); > >> + >> + if (speed) >> + *cost += 2 * extra_cost->alu.logical >> + + extra_cost->alu.arith; >> + >> + return true; >> + } >> + >> + /* Fall-through. */ >> case UMOD: >> if (speed) >> { >> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md >> index b7b04c4..a515573 100644 >> --- a/gcc/config/aarch64/aarch64.md >> +++ b/gcc/config/aarch64/aarch64.md >> @@ -302,6 +302,62 @@ (define_expand "cmp<mode>" >> } >> ) >> >> +;; AArch64-specific expansion of signed mod by power of 2 using CSNEG. > Seems a strange comment given that we are in aarch64.md :-). > >> +;; For x0 % n where n is a power of 2 produce: >> +;; negs x1, x0 >> +;; and x0, x0, #(n - 1) >> +;; and x1, x1, #(n - 1) >> +;; csneg x0, x0, x1, mi >> + >> +(define_expand "mod<mode>3" >> + [(match_operand:GPI 0 "register_operand" "") >> + (match_operand:GPI 1 "register_operand" "") >> + (match_operand:GPI 2 "const_int_operand" "")] >> + "" >> + { >> + HOST_WIDE_INT val = INTVAL (operands[2]); >> + >> + if (val <= 0 >> + || exact_log2 (INTVAL (operands[2])) <= 0 >> + || !aarch64_bitmask_imm (INTVAL (operands[2]) - 1, <MODE>mode)) >> + FAIL; >> + >> + rtx mask = GEN_INT (val - 1); >> + >> + /* In the special case of x0 % 2 we can do the even shorter: >> + cmp x0, xzr >> + and x0, x0, 1 >> + csneg x0, x0, x0, ge. */ >> + if (val == 2) >> + { >> + rtx masked = gen_reg_rtx (<MODE>mode); >> + rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx); > Non-obvious why this is correct given the comment above saying we want GE. We want to negate if the comparison earlier yielded "less than zero". Unfortunately, the CSNEG form of that is written as csneg x0, x0, x0, ge which looks counter-intuitive at first glance. With my other patch posted at https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00020.html the generated assembly would be: cneg x0, x0, lt which more closely matches the RTL generation in this hunk. If you think that patch should go in, I'll rewrite the CSNEG form in this comment to CNEG. Kyrill > >> + emit_insn (gen_and<mode>3 (masked, operands[1], mask)); >> + rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx); >> + emit_insn (gen_csneg3<mode>_insn (operands[0], x, masked, masked)); >> + DONE; >> + } >> + >> + rtx neg_op = gen_reg_rtx (<MODE>mode); >> + rtx_insn *insn = emit_insn (gen_neg<mode>2_compare0 (neg_op, operands[1])); >> + >> + /* Extract the condition register and mode. */ >> + rtx cmp = XVECEXP (PATTERN (insn), 0, 0); >> + rtx cc_reg = SET_DEST (cmp); >> + rtx cond = gen_rtx_GE (VOIDmode, cc_reg, const0_rtx); >> + >> + rtx masked_pos = gen_reg_rtx (<MODE>mode); >> + emit_insn (gen_and<mode>3 (masked_pos, operands[1], mask)); >> + >> + rtx masked_neg = gen_reg_rtx (<MODE>mode); >> + emit_insn (gen_and<mode>3 (masked_neg, neg_op, mask)); >> + >> + emit_insn (gen_csneg3<mode>_insn (operands[0], cond, >> + masked_neg, masked_pos)); >> + DONE; >> + } >> +) >> + > Thanks, > James >
commit de67e5fba716ce835f595c4167f57ec4faf61607 Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> Date: Wed Jul 15 17:01:13 2015 +0100 [AArch64][1/3] Expand signed mod by power of 2 using CSNEG diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1394ed7..c8bd8d2 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -6652,6 +6652,25 @@ cost_plus: return true; case MOD: + /* We can expand signed mod by power of 2 using a + NEGS, two parallel ANDs and a CSNEG. Assume here + that CSNEG is COSTS_N_INSNS (1). This case should + only ever be reached through the set_smod_pow2_cheap check + in expmed.c. */ + if (CONST_INT_P (XEXP (x, 1)) + && exact_log2 (INTVAL (XEXP (x, 1))) > 0 + && (mode == SImode || mode == DImode)) + { + *cost += COSTS_N_INSNS (3); + + if (speed) + *cost += 2 * extra_cost->alu.logical + + extra_cost->alu.arith; + + return true; + } + + /* Fall-through. */ case UMOD: if (speed) { diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index b7b04c4..a515573 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -302,6 +302,62 @@ (define_expand "cmp<mode>" } ) +;; AArch64-specific expansion of signed mod by power of 2 using CSNEG. +;; For x0 % n where n is a power of 2 produce: +;; negs x1, x0 +;; and x0, x0, #(n - 1) +;; and x1, x1, #(n - 1) +;; csneg x0, x0, x1, mi + +(define_expand "mod<mode>3" + [(match_operand:GPI 0 "register_operand" "") + (match_operand:GPI 1 "register_operand" "") + (match_operand:GPI 2 "const_int_operand" "")] + "" + { + HOST_WIDE_INT val = INTVAL (operands[2]); + + if (val <= 0 + || exact_log2 (INTVAL (operands[2])) <= 0 + || !aarch64_bitmask_imm (INTVAL (operands[2]) - 1, <MODE>mode)) + FAIL; + + rtx mask = GEN_INT (val - 1); + + /* In the special case of x0 % 2 we can do the even shorter: + cmp x0, xzr + and x0, x0, 1 + csneg x0, x0, x0, ge. */ + if (val == 2) + { + rtx masked = gen_reg_rtx (<MODE>mode); + rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx); + emit_insn (gen_and<mode>3 (masked, operands[1], mask)); + rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx); + emit_insn (gen_csneg3<mode>_insn (operands[0], x, masked, masked)); + DONE; + } + + rtx neg_op = gen_reg_rtx (<MODE>mode); + rtx_insn *insn = emit_insn (gen_neg<mode>2_compare0 (neg_op, operands[1])); + + /* Extract the condition register and mode. */ + rtx cmp = XVECEXP (PATTERN (insn), 0, 0); + rtx cc_reg = SET_DEST (cmp); + rtx cond = gen_rtx_GE (VOIDmode, cc_reg, const0_rtx); + + rtx masked_pos = gen_reg_rtx (<MODE>mode); + emit_insn (gen_and<mode>3 (masked_pos, operands[1], mask)); + + rtx masked_neg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_and<mode>3 (masked_neg, neg_op, mask)); + + emit_insn (gen_csneg3<mode>_insn (operands[0], cond, + masked_neg, masked_pos)); + DONE; + } +) + (define_insn "*condjump" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand 1 "cc_register" "") (const_int 0)]) @@ -2392,7 +2448,7 @@ (define_insn "*ngcsi_uxtw" [(set_attr "type" "adc_reg")] ) -(define_insn "*neg<mode>2_compare0" +(define_insn "neg<mode>2_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r")) (const_int 0)))