Message ID | ZVZa8GkHxwl3OcxS@arm.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Rework ldp/stp patterns, add new ldp/stp pass | expand |
Alex Coplan <alex.coplan@arm.com> writes: > Thus far the writeback forms of ldp/stp have been exclusively used in > prologue and epilogue code for saving/restoring of registers to/from the > stack. > > As such, forms of ldp/stp that weren't needed for prologue/epilogue code > weren't supported by the aarch64 backend. This patch generalizes the > load/store pair writeback patterns to allow: > > - Base registers other than the stack pointer. > - Modes that weren't previously supported. > - Combinations of distinct modes provided they have the same size. > - Pre/post variants that weren't previously needed in prologue/epilogue > code. > > We make quite some effort to avoid a combinatorial explosion in the > number of patterns generated (and those in the source) by making > extensive use of special predicates. > > An updated version of the upcoming ldp/stp pass can generate the > writeback forms, so this patch is motivated by that. > > This patch doesn't add zero-extending or sign-extending forms of the > writeback patterns; that is left for future work. > > Bootstrapped/regtested as a series on aarch64-linux-gnu, OK for trunk? > > gcc/ChangeLog: > > * config/aarch64/aarch64-protos.h (aarch64_ldpstp_operand_mode_p): Declare. > * config/aarch64/aarch64.cc (aarch64_gen_storewb_pair): Build RTL > directly instead of invoking named pattern. > (aarch64_gen_loadwb_pair): Likewise. > (aarch64_ldpstp_operand_mode_p): New. > * config/aarch64/aarch64.md (loadwb_pair<GPI:mode>_<P:mode>): Replace with > ... > (*loadwb_post_pair_<ldst_sz>): ... this. Generalize as described > in cover letter. > (loadwb_pair<GPF:mode>_<P:mode>): Delete (superseded by the > above). > (*loadwb_post_pair_16): New. > (*loadwb_pre_pair_<ldst_sz>): New. > (loadwb_pair<TX:mode>_<P:mode>): Delete. > (*loadwb_pre_pair_16): New. > (storewb_pair<GPI:mode>_<P:mode>): Replace with ... > (*storewb_pre_pair_<ldst_sz>): ... this. Generalize as > described in cover letter. > (*storewb_pre_pair_16): New. > (storewb_pair<GPF:mode>_<P:mode>): Delete. > (*storewb_post_pair_<ldst_sz>): New. > (storewb_pair<TX:mode>_<P:mode>): Delete. > (*storewb_post_pair_16): New. > * config/aarch64/predicates.md (aarch64_mem_pair_operator): New. > (pmode_plus_operator): New. > (aarch64_ldp_reg_operand): New. > (aarch64_stp_reg_operand): New. > --- > gcc/config/aarch64/aarch64-protos.h | 1 + > gcc/config/aarch64/aarch64.cc | 60 +++--- > gcc/config/aarch64/aarch64.md | 284 ++++++++++++++++++++-------- > gcc/config/aarch64/predicates.md | 38 ++++ > 4 files changed, 271 insertions(+), 112 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index 36d6c688bc8..e463fd5c817 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -1023,6 +1023,7 @@ bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode); > bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode); > bool aarch64_mem_ok_with_ldpstp_policy_model (rtx, bool, machine_mode); > void aarch64_swap_ldrstr_operands (rtx *, bool); > +bool aarch64_ldpstp_operand_mode_p (machine_mode); > > extern void aarch64_asm_output_pool_epilogue (FILE *, const char *, > tree, HOST_WIDE_INT); > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 4820fac67a1..ccf081d2a16 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -8977,23 +8977,15 @@ static rtx > aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, > HOST_WIDE_INT adjustment) > { > - switch (mode) > - { > - case E_DImode: > - return gen_storewb_pairdi_di (base, base, reg, reg2, > - GEN_INT (-adjustment), > - GEN_INT (UNITS_PER_WORD - adjustment)); > - case E_DFmode: > - return gen_storewb_pairdf_di (base, base, reg, reg2, > - GEN_INT (-adjustment), > - GEN_INT (UNITS_PER_WORD - adjustment)); > - case E_TFmode: > - return gen_storewb_pairtf_di (base, base, reg, reg2, > - GEN_INT (-adjustment), > - GEN_INT (UNITS_PER_VREG - adjustment)); > - default: > - gcc_unreachable (); > - } > + rtx new_base = plus_constant (Pmode, base, -adjustment); > + rtx mem = gen_frame_mem (mode, new_base); > + rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode)); > + > + return gen_rtx_PARALLEL (VOIDmode, > + gen_rtvec (3, > + gen_rtx_SET (base, new_base), > + gen_rtx_SET (mem, reg), > + gen_rtx_SET (mem2, reg2))); > } > > /* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the > @@ -9025,20 +9017,15 @@ static rtx > aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, > HOST_WIDE_INT adjustment) > { > - switch (mode) > - { > - case E_DImode: > - return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment), > - GEN_INT (UNITS_PER_WORD)); > - case E_DFmode: > - return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment), > - GEN_INT (UNITS_PER_WORD)); > - case E_TFmode: > - return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment), > - GEN_INT (UNITS_PER_VREG)); > - default: > - gcc_unreachable (); > - } > + rtx mem = gen_frame_mem (mode, base); > + rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode)); > + rtx new_base = plus_constant (Pmode, base, adjustment); > + > + return gen_rtx_PARALLEL (VOIDmode, > + gen_rtvec (3, > + gen_rtx_SET (base, new_base), > + gen_rtx_SET (reg, mem), > + gen_rtx_SET (reg2, mem2))); > } > > /* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it > @@ -26688,6 +26675,17 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) > return false; > } > > +bool > +aarch64_ldpstp_operand_mode_p (machine_mode mode) The function should have a comment. Realise it's kind of obvious from the name, but still. I suppose at least one thing to clarify is that the mode is for one register, rather than the pair as a whole. > +{ > + if (!targetm.hard_regno_mode_ok (V0_REGNUM, mode) > + || hard_regno_nregs (V0_REGNUM, mode) > 1) > + return false; > + > + const auto size = GET_MODE_SIZE (mode); > + return known_eq (size, 4) || known_eq (size, 8) || known_eq (size, 16); > +} > + > /* Return true if MEM1 and MEM2 can be combined into a single access > of mode MODE, with the combined access having the same address as MEM1. */ > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 7be1de38b1c..c92a51690c5 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1831,102 +1831,224 @@ (define_insn "store_pair_dw_<TX:mode><TX2:mode>" > (set_attr "fp" "yes")] > ) > > +;; Writeback load/store pair patterns. > +;; > +;; Note that modes in the patterns [SI DI TI] are used only as a proxy for their > +;; size; aarch64_ldp_reg_operand and aarch64_mem_pair_operator are special > +;; predicates which accept a wide range of operand modes, with the requirement > +;; that the contextual (pattern) mode is of the same size as the operand mode. > + > ;; Load pair with post-index writeback. This is primarily used in function > ;; epilogues. > -(define_insn "loadwb_pair<GPI:mode>_<P:mode>" > +(define_insn "*loadwb_post_pair_<ldst_sz>" > [(parallel Pre-existing, but the outer parallel is redundant. insn patterns are inherently parallel, only define_expand patterns are not. > - [(set (match_operand:P 0 "register_operand" "=k") > - (plus:P (match_operand:P 1 "register_operand" "0") > - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) > - (set (match_operand:GPI 2 "register_operand" "=r") > - (mem:GPI (match_dup 1))) > - (set (match_operand:GPI 3 "register_operand" "=r") > - (mem:GPI (plus:P (match_dup 1) > - (match_operand:P 5 "const_int_operand" "n"))))])] > - "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)" > - "ldp\\t%<GPI:w>2, %<GPI:w>3, [%1], %4" > - [(set_attr "type" "load_<GPI:ldpstp_sz>")] > -) > - > -(define_insn "loadwb_pair<GPF:mode>_<P:mode>" > + [(set (match_operand 0 "pmode_register_operand") > + (match_operator 7 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand") > + (match_operand 4 "const_int_operand")])) > + (set (match_operand:GPI 2 "aarch64_ldp_reg_operand") > + (match_operator 5 "memory_operand" [(match_dup 1)])) > + (set (match_operand:GPI 3 "aarch64_ldp_reg_operand") > + (match_operator 6 "memory_operand" [ > + (match_operator 8 "pmode_plus_operator" [ > + (match_dup 1) > + (const_int <ldst_sz>)])]))])] > + "aarch64_mem_pair_offset (operands[4], <MODE>mode) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" In principle, the last two conditions shouldn't be needed, since the requirement holds from generic RTL rules. Same for the other load patterns. > + {@ [cons: =0, 1, =2, =3; attrs: type] > + [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%1], %4 > + [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%1], %4 Think we should just use spaces for indentation after the "[". Only the leading whitespace "needs" to be tabbed. That'd make diffs easier to read. > + } > +) > + > +;; q-register variant of the above > +(define_insn "*loadwb_post_pair_16" > + [(parallel > + [(set (match_operand 0 "pmode_register_operand" "=rk") > + (match_operator 7 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand" "0") > + (match_operand 4 "const_int_operand")])) > + (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w") > + (match_operator 5 "memory_operand" [(match_dup 1)])) > + (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w") > + (match_operator 6 "memory_operand" > + [(match_operator 8 "pmode_plus_operator" [ > + (match_dup 1) > + (const_int 16)])]))])] > + "TARGET_FLOAT > + && aarch64_mem_pair_offset (operands[4], TImode) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" > + "ldp\t%q2, %q3, [%1], %4" > + [(set_attr "type" "neon_ldp_q")] > +) > + > +;; Load pair with pre-index writeback. > +(define_insn "*loadwb_pre_pair_<ldst_sz>" > [(parallel > - [(set (match_operand:P 0 "register_operand" "=k") > - (plus:P (match_operand:P 1 "register_operand" "0") > - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) > - (set (match_operand:GPF 2 "register_operand" "=w") > - (mem:GPF (match_dup 1))) > - (set (match_operand:GPF 3 "register_operand" "=w") > - (mem:GPF (plus:P (match_dup 1) > - (match_operand:P 5 "const_int_operand" "n"))))])] > - "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)" > - "ldp\\t%<GPF:w>2, %<GPF:w>3, [%1], %4" > - [(set_attr "type" "neon_load1_2reg")] > -) > - > -(define_insn "loadwb_pair<TX:mode>_<P:mode>" > + [(set (match_operand 0 "pmode_register_operand") > + (match_operator 8 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand") > + (match_operand 4 "const_int_operand")])) > + (set (match_operand:GPI 2 "aarch64_ldp_reg_operand") > + (match_operator 6 "memory_operand" [ > + (match_operator 10 "pmode_plus_operator" [ > + (match_dup 1) > + (match_dup 4) > + ])])) > + (set (match_operand:GPI 3 "aarch64_ldp_reg_operand") > + (match_operator 7 "memory_operand" [ > + (match_operator 9 "pmode_plus_operator" [ > + (match_dup 1) > + (match_operand 5 "const_int_operand") > + ])]))])] Very minor, but the operand numbering looks a bit inconsistent. Think it would be more natural to use 9 for the middle arm and 10 for the final one. Same for later patterns. OK with those changes, thanks. Richard > + "aarch64_mem_pair_offset (operands[4], <MODE>mode) > + && known_eq (INTVAL (operands[5]), > + INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode)) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" > + {@ [cons: =&0, 1, =2, =3; attrs: type ] > + [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]! > + [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]! > + } > +) > + > +;; q-register variant of the above > +(define_insn "*loadwb_pre_pair_16" > [(parallel > - [(set (match_operand:P 0 "register_operand" "=k") > - (plus:P (match_operand:P 1 "register_operand" "0") > - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) > - (set (match_operand:TX 2 "register_operand" "=w") > - (mem:TX (match_dup 1))) > - (set (match_operand:TX 3 "register_operand" "=w") > - (mem:TX (plus:P (match_dup 1) > - (match_operand:P 5 "const_int_operand" "n"))))])] > - "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)" > - "ldp\\t%q2, %q3, [%1], %4" > + [(set (match_operand 0 "pmode_register_operand" "=&rk") > + (match_operator 8 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand" "0") > + (match_operand 4 "const_int_operand")])) > + (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w") > + (match_operator 6 "memory_operand" [ > + (match_operator 10 "pmode_plus_operator" [ > + (match_dup 1) > + (match_dup 4) > + ])])) > + (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w") > + (match_operator 7 "memory_operand" [ > + (match_operator 9 "pmode_plus_operator" [ > + (match_dup 1) > + (match_operand 5 "const_int_operand") > + ])]))])] > + "TARGET_FLOAT > + && aarch64_mem_pair_offset (operands[4], TImode) > + && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" > + "ldp\t%q2, %q3, [%0, %4]!" > [(set_attr "type" "neon_ldp_q")] > ) > > ;; Store pair with pre-index writeback. This is primarily used in function > ;; prologues. > -(define_insn "storewb_pair<GPI:mode>_<P:mode>" > +(define_insn "*storewb_pre_pair_<ldst_sz>" > [(parallel > - [(set (match_operand:P 0 "register_operand" "=&k") > - (plus:P (match_operand:P 1 "register_operand" "0") > - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) > - (set (mem:GPI (plus:P (match_dup 0) > - (match_dup 4))) > - (match_operand:GPI 2 "register_operand" "r")) > - (set (mem:GPI (plus:P (match_dup 0) > - (match_operand:P 5 "const_int_operand" "n"))) > - (match_operand:GPI 3 "register_operand" "r"))])] > - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)" > - "stp\\t%<GPI:w>2, %<GPI:w>3, [%0, %4]!" > - [(set_attr "type" "store_<GPI:ldpstp_sz>")] > + [(set (match_operand 0 "pmode_register_operand") > + (match_operator 6 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand") > + (match_operand 4 "const_int_operand") > + ])) > + (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [ > + (match_operator 8 "pmode_plus_operator" [ > + (match_dup 0) > + (match_dup 4) > + ])]) > + (match_operand:GPI 2 "aarch64_stp_reg_operand")) > + (set (match_operator:GPI 9 "aarch64_mem_pair_operator" [ > + (match_operator 10 "pmode_plus_operator" [ > + (match_dup 0) > + (match_operand 5 "const_int_operand") > + ])]) > + (match_operand:GPI 3 "aarch64_stp_reg_operand"))])] > + "aarch64_mem_pair_offset (operands[4], <MODE>mode) > + && known_eq (INTVAL (operands[5]), > + INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode)) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" > + {@ [cons: =&0, 1, 2, 3; attrs: type ] > + [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]! > + [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]! > + } > +) > + > +;; q-register variant of the above. > +(define_insn "*storewb_pre_pair_16" > + [(parallel > + [(set (match_operand 0 "pmode_register_operand" "=&rk") > + (match_operator 6 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand" "0") > + (match_operand 4 "const_int_operand") > + ])) > + (set (match_operator:TI 7 "aarch64_mem_pair_operator" [ > + (match_operator 8 "pmode_plus_operator" [ > + (match_dup 0) > + (match_dup 4) > + ])]) > + (match_operand:TI 2 "aarch64_ldp_reg_operand" "w")) > + (set (match_operator:TI 9 "aarch64_mem_pair_operator" [ > + (match_operator 10 "pmode_plus_operator" [ > + (match_dup 0) > + (match_operand 5 "const_int_operand") > + ])]) > + (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])] > + "TARGET_FLOAT > + && aarch64_mem_pair_offset (operands[4], TImode) > + && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" > + "stp\\t%q2, %q3, [%0, %4]!" > + [(set_attr "type" "neon_stp_q")] > ) > > -(define_insn "storewb_pair<GPF:mode>_<P:mode>" > +;; Store pair with post-index writeback. > +(define_insn "*storewb_post_pair_<ldst_sz>" > [(parallel > - [(set (match_operand:P 0 "register_operand" "=&k") > - (plus:P (match_operand:P 1 "register_operand" "0") > - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) > - (set (mem:GPF (plus:P (match_dup 0) > - (match_dup 4))) > - (match_operand:GPF 2 "register_operand" "w")) > - (set (mem:GPF (plus:P (match_dup 0) > - (match_operand:P 5 "const_int_operand" "n"))) > - (match_operand:GPF 3 "register_operand" "w"))])] > - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)" > - "stp\\t%<GPF:w>2, %<GPF:w>3, [%0, %4]!" > - [(set_attr "type" "neon_store1_2reg<q>")] > -) > - > -(define_insn "storewb_pair<TX:mode>_<P:mode>" > + [(set (match_operand 0 "pmode_register_operand") > + (match_operator 5 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand") > + (match_operand 4 "const_int_operand") > + ])) > + (set (match_operator:GPI 6 "aarch64_mem_pair_operator" [(match_dup 1)]) > + (match_operand 2 "aarch64_stp_reg_operand")) > + (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [ > + (match_operator 8 "pmode_plus_operator" [ > + (match_dup 0) > + (const_int <ldst_sz>) > + ])]) > + (match_operand 3 "aarch64_stp_reg_operand"))])] > + "aarch64_mem_pair_offset (operands[4], <MODE>mode) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" > + {@ [cons: =0, 1, 2, 3; attrs: type ] > + [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0], %4 > + [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0], %4 > + } > +) > + > +;; Store pair with post-index writeback. > +(define_insn "*storewb_post_pair_16" > [(parallel > - [(set (match_operand:P 0 "register_operand" "=&k") > - (plus:P (match_operand:P 1 "register_operand" "0") > - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) > - (set (mem:TX (plus:P (match_dup 0) > - (match_dup 4))) > - (match_operand:TX 2 "register_operand" "w")) > - (set (mem:TX (plus:P (match_dup 0) > - (match_operand:P 5 "const_int_operand" "n"))) > - (match_operand:TX 3 "register_operand" "w"))])] > - "TARGET_SIMD > - && INTVAL (operands[5]) > - == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)" > - "stp\\t%q2, %q3, [%0, %4]!" > + [(set (match_operand 0 "pmode_register_operand" "=rk") > + (match_operator 5 "pmode_plus_operator" [ > + (match_operand 1 "pmode_register_operand" "0") > + (match_operand 4 "const_int_operand") > + ])) > + (set (match_operator:TI 6 "aarch64_mem_pair_operator" [(match_dup 1)]) > + (match_operand:TI 2 "aarch64_ldp_reg_operand" "w")) > + (set (match_operator:TI 7 "aarch64_mem_pair_operator" [ > + (match_operator 8 "pmode_plus_operator" [ > + (match_dup 0) > + (const_int 16) > + ])]) > + (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])] > + "TARGET_FLOAT > + && aarch64_mem_pair_offset (operands[4], TImode) > + && !reg_overlap_mentioned_p (operands[0], operands[2]) > + && !reg_overlap_mentioned_p (operands[0], operands[3])" > + "stp\t%q2, %q3, [%0], %4" > [(set_attr "type" "neon_stp_q")] > ) > > diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md > index a73724a7fc0..b647e5af7c6 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -257,11 +257,49 @@ (define_predicate "aarch64_mem_pair_offset" > (and (match_code "const_int") > (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))"))) > > +(define_special_predicate "aarch64_mem_pair_operator" > + (and > + (match_code "mem") > + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))") > + (ior > + (match_test "mode == VOIDmode") > + (match_test "known_eq (GET_MODE_SIZE (mode), > + GET_MODE_SIZE (GET_MODE (op)))")))) > + > (define_predicate "aarch64_mem_pair_operand" > (and (match_code "mem") > (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), false, > ADDR_QUERY_LDP_STP)"))) > > +(define_predicate "pmode_plus_operator" > + (and (match_code "plus") > + (match_test "GET_MODE (op) == Pmode"))) > + > +(define_special_predicate "aarch64_ldp_reg_operand" > + (and > + (match_code "reg,subreg") > + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))") > + (ior > + (match_test "mode == VOIDmode") > + (match_test "known_eq (GET_MODE_SIZE (mode), > + GET_MODE_SIZE (GET_MODE (op)))")))) > + > +(define_special_predicate "aarch64_stp_reg_operand" > + (ior (match_operand 0 "aarch64_ldp_reg_operand") > + (and (ior > + (and (match_code "const_int,const,const_vector") > + (match_test "op == CONST0_RTX (GET_MODE (op))")) > + (and (match_code "const_double") > + (match_test "aarch64_float_const_zero_rtx_p (op)"))) > + (ior > + (match_test "GET_MODE (op) == VOIDmode") > + (and > + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))") > + (ior > + (match_test "mode == VOIDmode") > + (match_test "known_eq (GET_MODE_SIZE (mode), > + GET_MODE_SIZE (GET_MODE (op)))"))))))) > + > ;; Used for storing two 64-bit values in an AdvSIMD register using an STP > ;; as a 128-bit vec_concat. > (define_predicate "aarch64_mem_pair_lanes_operand"
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 36d6c688bc8..e463fd5c817 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1023,6 +1023,7 @@ bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode); bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode); bool aarch64_mem_ok_with_ldpstp_policy_model (rtx, bool, machine_mode); void aarch64_swap_ldrstr_operands (rtx *, bool); +bool aarch64_ldpstp_operand_mode_p (machine_mode); extern void aarch64_asm_output_pool_epilogue (FILE *, const char *, tree, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4820fac67a1..ccf081d2a16 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -8977,23 +8977,15 @@ static rtx aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, HOST_WIDE_INT adjustment) { - switch (mode) - { - case E_DImode: - return gen_storewb_pairdi_di (base, base, reg, reg2, - GEN_INT (-adjustment), - GEN_INT (UNITS_PER_WORD - adjustment)); - case E_DFmode: - return gen_storewb_pairdf_di (base, base, reg, reg2, - GEN_INT (-adjustment), - GEN_INT (UNITS_PER_WORD - adjustment)); - case E_TFmode: - return gen_storewb_pairtf_di (base, base, reg, reg2, - GEN_INT (-adjustment), - GEN_INT (UNITS_PER_VREG - adjustment)); - default: - gcc_unreachable (); - } + rtx new_base = plus_constant (Pmode, base, -adjustment); + rtx mem = gen_frame_mem (mode, new_base); + rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode)); + + return gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (3, + gen_rtx_SET (base, new_base), + gen_rtx_SET (mem, reg), + gen_rtx_SET (mem2, reg2))); } /* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the @@ -9025,20 +9017,15 @@ static rtx aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, HOST_WIDE_INT adjustment) { - switch (mode) - { - case E_DImode: - return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment), - GEN_INT (UNITS_PER_WORD)); - case E_DFmode: - return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment), - GEN_INT (UNITS_PER_WORD)); - case E_TFmode: - return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment), - GEN_INT (UNITS_PER_VREG)); - default: - gcc_unreachable (); - } + rtx mem = gen_frame_mem (mode, base); + rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode)); + rtx new_base = plus_constant (Pmode, base, adjustment); + + return gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (3, + gen_rtx_SET (base, new_base), + gen_rtx_SET (reg, mem), + gen_rtx_SET (reg2, mem2))); } /* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it @@ -26688,6 +26675,17 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) return false; } +bool +aarch64_ldpstp_operand_mode_p (machine_mode mode) +{ + if (!targetm.hard_regno_mode_ok (V0_REGNUM, mode) + || hard_regno_nregs (V0_REGNUM, mode) > 1) + return false; + + const auto size = GET_MODE_SIZE (mode); + return known_eq (size, 4) || known_eq (size, 8) || known_eq (size, 16); +} + /* Return true if MEM1 and MEM2 can be combined into a single access of mode MODE, with the combined access having the same address as MEM1. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 7be1de38b1c..c92a51690c5 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1831,102 +1831,224 @@ (define_insn "store_pair_dw_<TX:mode><TX2:mode>" (set_attr "fp" "yes")] ) +;; Writeback load/store pair patterns. +;; +;; Note that modes in the patterns [SI DI TI] are used only as a proxy for their +;; size; aarch64_ldp_reg_operand and aarch64_mem_pair_operator are special +;; predicates which accept a wide range of operand modes, with the requirement +;; that the contextual (pattern) mode is of the same size as the operand mode. + ;; Load pair with post-index writeback. This is primarily used in function ;; epilogues. -(define_insn "loadwb_pair<GPI:mode>_<P:mode>" +(define_insn "*loadwb_post_pair_<ldst_sz>" [(parallel - [(set (match_operand:P 0 "register_operand" "=k") - (plus:P (match_operand:P 1 "register_operand" "0") - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (match_operand:GPI 2 "register_operand" "=r") - (mem:GPI (match_dup 1))) - (set (match_operand:GPI 3 "register_operand" "=r") - (mem:GPI (plus:P (match_dup 1) - (match_operand:P 5 "const_int_operand" "n"))))])] - "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)" - "ldp\\t%<GPI:w>2, %<GPI:w>3, [%1], %4" - [(set_attr "type" "load_<GPI:ldpstp_sz>")] -) - -(define_insn "loadwb_pair<GPF:mode>_<P:mode>" + [(set (match_operand 0 "pmode_register_operand") + (match_operator 7 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand") + (match_operand 4 "const_int_operand")])) + (set (match_operand:GPI 2 "aarch64_ldp_reg_operand") + (match_operator 5 "memory_operand" [(match_dup 1)])) + (set (match_operand:GPI 3 "aarch64_ldp_reg_operand") + (match_operator 6 "memory_operand" [ + (match_operator 8 "pmode_plus_operator" [ + (match_dup 1) + (const_int <ldst_sz>)])]))])] + "aarch64_mem_pair_offset (operands[4], <MODE>mode) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + {@ [cons: =0, 1, =2, =3; attrs: type] + [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%1], %4 + [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%1], %4 + } +) + +;; q-register variant of the above +(define_insn "*loadwb_post_pair_16" + [(parallel + [(set (match_operand 0 "pmode_register_operand" "=rk") + (match_operator 7 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand" "0") + (match_operand 4 "const_int_operand")])) + (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w") + (match_operator 5 "memory_operand" [(match_dup 1)])) + (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w") + (match_operator 6 "memory_operand" + [(match_operator 8 "pmode_plus_operator" [ + (match_dup 1) + (const_int 16)])]))])] + "TARGET_FLOAT + && aarch64_mem_pair_offset (operands[4], TImode) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + "ldp\t%q2, %q3, [%1], %4" + [(set_attr "type" "neon_ldp_q")] +) + +;; Load pair with pre-index writeback. +(define_insn "*loadwb_pre_pair_<ldst_sz>" [(parallel - [(set (match_operand:P 0 "register_operand" "=k") - (plus:P (match_operand:P 1 "register_operand" "0") - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (match_operand:GPF 2 "register_operand" "=w") - (mem:GPF (match_dup 1))) - (set (match_operand:GPF 3 "register_operand" "=w") - (mem:GPF (plus:P (match_dup 1) - (match_operand:P 5 "const_int_operand" "n"))))])] - "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)" - "ldp\\t%<GPF:w>2, %<GPF:w>3, [%1], %4" - [(set_attr "type" "neon_load1_2reg")] -) - -(define_insn "loadwb_pair<TX:mode>_<P:mode>" + [(set (match_operand 0 "pmode_register_operand") + (match_operator 8 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand") + (match_operand 4 "const_int_operand")])) + (set (match_operand:GPI 2 "aarch64_ldp_reg_operand") + (match_operator 6 "memory_operand" [ + (match_operator 10 "pmode_plus_operator" [ + (match_dup 1) + (match_dup 4) + ])])) + (set (match_operand:GPI 3 "aarch64_ldp_reg_operand") + (match_operator 7 "memory_operand" [ + (match_operator 9 "pmode_plus_operator" [ + (match_dup 1) + (match_operand 5 "const_int_operand") + ])]))])] + "aarch64_mem_pair_offset (operands[4], <MODE>mode) + && known_eq (INTVAL (operands[5]), + INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode)) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + {@ [cons: =&0, 1, =2, =3; attrs: type ] + [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]! + [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]! + } +) + +;; q-register variant of the above +(define_insn "*loadwb_pre_pair_16" [(parallel - [(set (match_operand:P 0 "register_operand" "=k") - (plus:P (match_operand:P 1 "register_operand" "0") - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (match_operand:TX 2 "register_operand" "=w") - (mem:TX (match_dup 1))) - (set (match_operand:TX 3 "register_operand" "=w") - (mem:TX (plus:P (match_dup 1) - (match_operand:P 5 "const_int_operand" "n"))))])] - "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)" - "ldp\\t%q2, %q3, [%1], %4" + [(set (match_operand 0 "pmode_register_operand" "=&rk") + (match_operator 8 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand" "0") + (match_operand 4 "const_int_operand")])) + (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w") + (match_operator 6 "memory_operand" [ + (match_operator 10 "pmode_plus_operator" [ + (match_dup 1) + (match_dup 4) + ])])) + (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w") + (match_operator 7 "memory_operand" [ + (match_operator 9 "pmode_plus_operator" [ + (match_dup 1) + (match_operand 5 "const_int_operand") + ])]))])] + "TARGET_FLOAT + && aarch64_mem_pair_offset (operands[4], TImode) + && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + "ldp\t%q2, %q3, [%0, %4]!" [(set_attr "type" "neon_ldp_q")] ) ;; Store pair with pre-index writeback. This is primarily used in function ;; prologues. -(define_insn "storewb_pair<GPI:mode>_<P:mode>" +(define_insn "*storewb_pre_pair_<ldst_sz>" [(parallel - [(set (match_operand:P 0 "register_operand" "=&k") - (plus:P (match_operand:P 1 "register_operand" "0") - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (mem:GPI (plus:P (match_dup 0) - (match_dup 4))) - (match_operand:GPI 2 "register_operand" "r")) - (set (mem:GPI (plus:P (match_dup 0) - (match_operand:P 5 "const_int_operand" "n"))) - (match_operand:GPI 3 "register_operand" "r"))])] - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)" - "stp\\t%<GPI:w>2, %<GPI:w>3, [%0, %4]!" - [(set_attr "type" "store_<GPI:ldpstp_sz>")] + [(set (match_operand 0 "pmode_register_operand") + (match_operator 6 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand") + (match_operand 4 "const_int_operand") + ])) + (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [ + (match_operator 8 "pmode_plus_operator" [ + (match_dup 0) + (match_dup 4) + ])]) + (match_operand:GPI 2 "aarch64_stp_reg_operand")) + (set (match_operator:GPI 9 "aarch64_mem_pair_operator" [ + (match_operator 10 "pmode_plus_operator" [ + (match_dup 0) + (match_operand 5 "const_int_operand") + ])]) + (match_operand:GPI 3 "aarch64_stp_reg_operand"))])] + "aarch64_mem_pair_offset (operands[4], <MODE>mode) + && known_eq (INTVAL (operands[5]), + INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode)) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + {@ [cons: =&0, 1, 2, 3; attrs: type ] + [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]! + [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]! + } +) + +;; q-register variant of the above. +(define_insn "*storewb_pre_pair_16" + [(parallel + [(set (match_operand 0 "pmode_register_operand" "=&rk") + (match_operator 6 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand" "0") + (match_operand 4 "const_int_operand") + ])) + (set (match_operator:TI 7 "aarch64_mem_pair_operator" [ + (match_operator 8 "pmode_plus_operator" [ + (match_dup 0) + (match_dup 4) + ])]) + (match_operand:TI 2 "aarch64_ldp_reg_operand" "w")) + (set (match_operator:TI 9 "aarch64_mem_pair_operator" [ + (match_operator 10 "pmode_plus_operator" [ + (match_dup 0) + (match_operand 5 "const_int_operand") + ])]) + (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])] + "TARGET_FLOAT + && aarch64_mem_pair_offset (operands[4], TImode) + && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + "stp\\t%q2, %q3, [%0, %4]!" + [(set_attr "type" "neon_stp_q")] ) -(define_insn "storewb_pair<GPF:mode>_<P:mode>" +;; Store pair with post-index writeback. +(define_insn "*storewb_post_pair_<ldst_sz>" [(parallel - [(set (match_operand:P 0 "register_operand" "=&k") - (plus:P (match_operand:P 1 "register_operand" "0") - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (mem:GPF (plus:P (match_dup 0) - (match_dup 4))) - (match_operand:GPF 2 "register_operand" "w")) - (set (mem:GPF (plus:P (match_dup 0) - (match_operand:P 5 "const_int_operand" "n"))) - (match_operand:GPF 3 "register_operand" "w"))])] - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)" - "stp\\t%<GPF:w>2, %<GPF:w>3, [%0, %4]!" - [(set_attr "type" "neon_store1_2reg<q>")] -) - -(define_insn "storewb_pair<TX:mode>_<P:mode>" + [(set (match_operand 0 "pmode_register_operand") + (match_operator 5 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand") + (match_operand 4 "const_int_operand") + ])) + (set (match_operator:GPI 6 "aarch64_mem_pair_operator" [(match_dup 1)]) + (match_operand 2 "aarch64_stp_reg_operand")) + (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [ + (match_operator 8 "pmode_plus_operator" [ + (match_dup 0) + (const_int <ldst_sz>) + ])]) + (match_operand 3 "aarch64_stp_reg_operand"))])] + "aarch64_mem_pair_offset (operands[4], <MODE>mode) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + {@ [cons: =0, 1, 2, 3; attrs: type ] + [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0], %4 + [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0], %4 + } +) + +;; Store pair with post-index writeback. +(define_insn "*storewb_post_pair_16" [(parallel - [(set (match_operand:P 0 "register_operand" "=&k") - (plus:P (match_operand:P 1 "register_operand" "0") - (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (mem:TX (plus:P (match_dup 0) - (match_dup 4))) - (match_operand:TX 2 "register_operand" "w")) - (set (mem:TX (plus:P (match_dup 0) - (match_operand:P 5 "const_int_operand" "n"))) - (match_operand:TX 3 "register_operand" "w"))])] - "TARGET_SIMD - && INTVAL (operands[5]) - == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)" - "stp\\t%q2, %q3, [%0, %4]!" + [(set (match_operand 0 "pmode_register_operand" "=rk") + (match_operator 5 "pmode_plus_operator" [ + (match_operand 1 "pmode_register_operand" "0") + (match_operand 4 "const_int_operand") + ])) + (set (match_operator:TI 6 "aarch64_mem_pair_operator" [(match_dup 1)]) + (match_operand:TI 2 "aarch64_ldp_reg_operand" "w")) + (set (match_operator:TI 7 "aarch64_mem_pair_operator" [ + (match_operator 8 "pmode_plus_operator" [ + (match_dup 0) + (const_int 16) + ])]) + (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])] + "TARGET_FLOAT + && aarch64_mem_pair_offset (operands[4], TImode) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + "stp\t%q2, %q3, [%0], %4" [(set_attr "type" "neon_stp_q")] ) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index a73724a7fc0..b647e5af7c6 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -257,11 +257,49 @@ (define_predicate "aarch64_mem_pair_offset" (and (match_code "const_int") (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))"))) +(define_special_predicate "aarch64_mem_pair_operator" + (and + (match_code "mem") + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))") + (ior + (match_test "mode == VOIDmode") + (match_test "known_eq (GET_MODE_SIZE (mode), + GET_MODE_SIZE (GET_MODE (op)))")))) + (define_predicate "aarch64_mem_pair_operand" (and (match_code "mem") (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), false, ADDR_QUERY_LDP_STP)"))) +(define_predicate "pmode_plus_operator" + (and (match_code "plus") + (match_test "GET_MODE (op) == Pmode"))) + +(define_special_predicate "aarch64_ldp_reg_operand" + (and + (match_code "reg,subreg") + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))") + (ior + (match_test "mode == VOIDmode") + (match_test "known_eq (GET_MODE_SIZE (mode), + GET_MODE_SIZE (GET_MODE (op)))")))) + +(define_special_predicate "aarch64_stp_reg_operand" + (ior (match_operand 0 "aarch64_ldp_reg_operand") + (and (ior + (and (match_code "const_int,const,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))")) + (and (match_code "const_double") + (match_test "aarch64_float_const_zero_rtx_p (op)"))) + (ior + (match_test "GET_MODE (op) == VOIDmode") + (and + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))") + (ior + (match_test "mode == VOIDmode") + (match_test "known_eq (GET_MODE_SIZE (mode), + GET_MODE_SIZE (GET_MODE (op)))"))))))) + ;; Used for storing two 64-bit values in an AdvSIMD register using an STP ;; as a 128-bit vec_concat. (define_predicate "aarch64_mem_pair_lanes_operand"