@@ -138,6 +138,210 @@ struct alt_base
poly_int64 offset;
};
+// Virtual base class for load/store walkers used in alias analysis.
+struct alias_walker
+{
+ virtual bool conflict_p (int &budget) const = 0;
+ virtual insn_info *insn () const = 0;
+ virtual bool valid () const = 0;
+ virtual void advance () = 0;
+};
+
+// This is used in handle_writeback_opportunities describing
+// ALL if aarch64_ldp_writeback > 1 otherwise check
+// EXISTING if aarch64_ldp_writeback.
+enum class writeback {
+ ALL,
+ EXISTING
+};
+
+struct pair_fusion {
+ pair_fusion ()
+ {
+ calculate_dominance_info (CDI_DOMINATORS);
+ df_analyze ();
+ crtl->ssa = new rtl_ssa::function_info (cfun);
+ };
+
+ // Given:
+ // - an rtx REG_OP, the non-memory operand in a load/store insn,
+ // - a machine_mode MEM_MODE, the mode of the MEM in that insn, and
+ // - a boolean LOAD_P (true iff the insn is a load), then:
+ // return true if the access should be considered an FP/SIMD access.
+ // Such accesses are segregated from GPR accesses, since we only want
+ // to form pairs for accesses that use the same register file.
+ virtual bool fpsimd_op_p (rtx, machine_mode, bool)
+ {
+ return false;
+ }
+
+ // Return true if we should consider forming ldp/stp insns from memory
+ // accesses with operand mode MODE at this stage in compilation.
+ virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0;
+
+ // Return true iff REG_OP is a suitable register operand for a paired
+ // memory access, where LOAD_P is true if we're asking about loads and
+ // false for stores. MODE gives the mode of the operand.
+ virtual bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mode) = 0;
+
+ // Return alias check limit.
+ // This is needed to avoid unbounded quadratic behaviour when
+ // performing alias analysis.
+ virtual int pair_mem_alias_check_limit () = 0;
+
+ // Returns true if we should try to handle writeback opportunities
+ // WHICH parameter decides ALL or EXISTING writeback pairs.
+ virtual bool handle_writeback_opportunities (enum writeback which) = 0 ;
+
+ // Given BASE_MEM, the mem from the lower candidate access for a pair,
+ // and LOAD_P (true if the access is a load), check if we should proceed
+ // to form the pair given the target's code generation policy on
+ // paired accesses.
+ virtual bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) = 0;
+
+ // Generate the pattern for a paired access. PATS gives the patterns
+ // for the individual memory accesses (which by this point must share a
+ // common base register). If WRITEBACK is non-NULL, then this rtx
+ // describes the update to the base register that should be performed by
+ // the resulting insn. LOAD_P is true iff the accesses are loads.
+ virtual rtx gen_pair (rtx *pats, rtx writeback, bool load_p) = 0;
+
+ // Return true if memory is paired access, given INSN and LOAD_P
+ // is true for load insn and false for store insn.
+ virtual bool pair_mem_insn_p (rtx_insn *, bool &) = 0;
+
+ // Return true if we should track loads.
+ virtual bool track_loads_p ()
+ {
+ return true;
+ }
+
+ // Return true if we should track stores.
+ virtual bool track_stores_p ()
+ {
+ return true;
+ }
+
+ // Return true if OFF is in range.
+ virtual bool pair_mem_in_range_p (HOST_WIDE_INT off) = 0;
+
+ // Given a load/store pair insn in PATTERN, unpack the insn, storing
+ // the register operands in REGS, and returning the mem. LOAD_P is true
+ // for load and false for store.
+ virtual rtx destructure_pair (rtx regs[2], rtx pattern, bool load_p) = 0;
+
+ // Given a pair mem in PAIR_MEM, register operands in REGS, and an rtx
+ // representing the effect of writeback on the base register in WB_EFFECT,
+ // return an insn representing a writeback variant of this pair.
+ // LOAD_P is true iff the pair is a load.
+ // This is used when promoting existing non-writeback pairs to writeback
+ // variants.
+ virtual rtx gen_promote_writeback_pair (rtx wb_effect, rtx mem,
+ rtx regs[2], bool load_p) = 0;
+
+ void ldp_fusion_bb (bb_info *bb);
+ inline insn_info *find_trailing_add (insn_info *insns[2],
+ const insn_range_info &pair_range,
+ int initial_writeback,
+ rtx *writeback_effect,
+ def_info **add_def,
+ def_info *base_def,
+ poly_int64 initial_offset,
+ unsigned access_size);
+ inline int get_viable_bases (insn_info *insns[2],
+ vec<base_cand> &base_cands,
+ rtx cand_mems[2],
+ unsigned access_size,
+ bool reversed);
+ inline void do_alias_analysis (insn_info *alias_hazards[4],
+ alias_walker *walkers[4],
+ bool load_p);
+ inline void try_promote_writeback (insn_info *insn, bool load_p);
+ inline void run ();
+ ~pair_fusion()
+ {
+ if (crtl->ssa->perform_pending_updates ())
+ cleanup_cfg (0);
+
+ free_dominance_info (CDI_DOMINATORS);
+
+ delete crtl->ssa;
+ crtl->ssa = nullptr;
+ }
+};
+
+// This is the main function to start the pass.
+void
+pair_fusion::run ()
+{
+ if (!track_loads_p () && !track_stores_p ())
+ return;
+
+ for (auto bb : crtl->ssa->bbs ())
+ ldp_fusion_bb (bb);
+}
+
+struct aarch64_pair_fusion : public pair_fusion
+{
+ // Before RA, we use the modes, noting that stores of constant zero
+ // operands use GPRs (even in non-integer modes). After RA, we use
+ // the hard register numbers.
+ bool fpsimd_op_p (rtx reg_op, machine_mode mem_mode,
+ bool load_p) override final
+ {
+ return reload_completed
+ ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op)))
+ : (GET_MODE_CLASS (mem_mode) != MODE_INT
+ && (load_p || !aarch64_const_zero_rtx_p (reg_op)));
+ }
+
+ bool pair_mem_insn_p (rtx_insn *rti, bool &load_p) override final;
+ bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) override final
+ {
+ return aarch64_mem_ok_with_ldpstp_policy_model (base_mem,
+ load_p,
+ GET_MODE (base_mem));
+ }
+ bool pair_operand_mode_ok_p (machine_mode mode);
+ rtx gen_pair (rtx *pats, rtx writeback, bool load_p);
+ bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mode)
+ {
+ return (load_p
+ ? aarch64_ldp_reg_operand (reg_op, mode)
+ : aarch64_stp_reg_operand (reg_op, mode));
+ }
+ int pair_mem_alias_check_limit ()
+ {
+ return aarch64_ldp_alias_check_limit;
+ }
+ bool handle_writeback_opportunities (enum writeback which)
+ {
+ if (which == writeback::ALL)
+ return aarch64_ldp_writeback > 1;
+ else
+ return aarch64_ldp_writeback;
+ }
+ bool track_loads_p ()
+ {
+ return aarch64_tune_params.ldp_policy_model
+ != AARCH64_LDP_STP_POLICY_NEVER;
+ }
+ bool track_stores_p ()
+ {
+ return aarch64_tune_params.stp_policy_model
+ != AARCH64_LDP_STP_POLICY_NEVER;
+ }
+ bool pair_mem_in_range_p (HOST_WIDE_INT off)
+ {
+ return (off >= LDP_MIN_IMM && off <= LDP_MAX_IMM);
+ }
+ rtx gen_promote_writeback_pair (rtx wb_effect, rtx mem, rtx regs[2],
+ bool load_p);
+ rtx destructure_pair (rtx regs[2], rtx rti, bool load_p);
+};
+
// State used by the pass for a given basic block.
struct ldp_bb_info
{
@@ -159,9 +363,9 @@ struct ldp_bb_info
hash_map<def_hash, alt_base> canon_base_map;
static const size_t obstack_alignment = sizeof (void *);
- bb_info *m_bb;
- ldp_bb_info (bb_info *bb) : m_bb (bb), m_emitted_tombstone (false)
+ ldp_bb_info (bb_info *bb, pair_fusion *d)
+ : m_bb (bb), m_pass (d), m_emitted_tombstone (false)
{
obstack_specify_allocation (&m_obstack, OBSTACK_CHUNK_SIZE,
obstack_alignment, obstack_chunk_alloc,
@@ -181,37 +385,75 @@ struct ldp_bb_info
inline void track_access (insn_info *, bool load, rtx mem);
inline void transform ();
inline void cleanup_tombstones ();
+ inline void merge_pairs (insn_list_t &, insn_list_t &,
+ bool load_p, unsigned access_size);
+ inline void transform_for_base (int load_size, access_group &group);
+
+ inline bool try_fuse_pair (bool load_p, unsigned access_size,
+ insn_info *i1, insn_info *i2);
+
+ inline bool fuse_pair (bool load_p, unsigned access_size,
+ int writeback,
+ insn_info *i1, insn_info *i2,
+ base_cand &base,
+ const insn_range_info &move_range);
+
+ inline void track_tombstone (int uid);
private:
obstack m_obstack;
-
+ bb_info *m_bb;
+ pair_fusion *m_pass;
// State for keeping track of tombstone insns emitted for this BB.
bitmap_obstack m_bitmap_obstack;
bitmap_head m_tombstone_bitmap;
bool m_emitted_tombstone;
-
+ template<typename Map> inline void traverse_base_map (Map &map);
+ inline bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
inline splay_tree_node<access_record *> *node_alloc (access_record *);
+};
- template<typename Map>
- inline void traverse_base_map (Map &map);
- inline void transform_for_base (int load_size, access_group &group);
-
- inline void merge_pairs (insn_list_t &, insn_list_t &,
- bool load_p, unsigned access_size);
-
- inline bool try_fuse_pair (bool load_p, unsigned access_size,
- insn_info *i1, insn_info *i2);
+bool
+aarch64_pair_fusion::pair_mem_insn_p (rtx_insn *rti,
+ bool &load_p)
+{
+ rtx pat = PATTERN (rti);
+ if (GET_CODE (pat) == PARALLEL
+ && XVECLEN (pat, 0) == 2)
+ {
+ const auto attr = get_attr_ldpstp (rti);
+ if (attr == LDPSTP_NONE)
+ return false;
- inline bool fuse_pair (bool load_p, unsigned access_size,
- int writeback,
- insn_info *i1, insn_info *i2,
- base_cand &base,
- const insn_range_info &move_range);
+ load_p = (attr == LDPSTP_LDP);
+ gcc_checking_assert (load_p || attr == LDPSTP_STP);
+ return true;
+ }
+ return false;
+}
- inline void track_tombstone (int uid);
+rtx
+aarch64_pair_fusion::gen_pair (rtx *pats,
+ rtx writeback,
+ bool load_p)
+{
+ rtx pair_pat;
- inline bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
-};
+ if (writeback)
+ {
+ auto patvec = gen_rtvec (3, writeback, pats[0], pats[1]);
+ return gen_rtx_PARALLEL (VOIDmode, patvec);
+ }
+ else if (load_p)
+ return aarch64_gen_load_pair (XEXP (pats[0], 0),
+ XEXP (pats[1], 0),
+ XEXP (pats[0], 1));
+ else
+ return aarch64_gen_store_pair (XEXP (pats[0], 0),
+ XEXP (pats[0], 1),
+ XEXP (pats[1], 1));
+ return pair_pat;
+}
splay_tree_node<access_record *> *
ldp_bb_info::node_alloc (access_record *access)
@@ -312,8 +554,8 @@ any_post_modify_p (rtx x)
// Return true if we should consider forming ldp/stp insns from memory
// accesses with operand mode MODE at this stage in compilation.
-static bool
-ldp_operand_mode_ok_p (machine_mode mode)
+bool
+aarch64_pair_fusion::pair_operand_mode_ok_p (machine_mode mode)
{
if (!aarch64_ldpstp_operand_mode_p (mode))
return false;
@@ -404,9 +646,10 @@ ldp_bb_info::track_via_mem_expr (insn_info *insn, rtx mem, lfs_fields lfs)
const machine_mode mem_mode = GET_MODE (mem);
const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
- // Punt on misaligned offsets. LDP/STP instructions require offsets to be a
- // multiple of the access size, and we believe that misaligned offsets on
- // MEM_EXPR bases are likely to lead to misaligned offsets w.r.t. RTL bases.
+ // Punt on misaligned offsets. Paired memory access instructions require
+ // offsets to be a multiple of the access size, and we believe that
+ // misaligned offsets on MEM_EXPR bases are likely to lead to misaligned
+ // offsets w.r.t. RTL bases.
if (!multiple_p (offset, mem_size))
return false;
@@ -430,10 +673,10 @@ ldp_bb_info::track_via_mem_expr (insn_info *insn, rtx mem, lfs_fields lfs)
}
// Main function to begin pair discovery. Given a memory access INSN,
-// determine whether it could be a candidate for fusing into an ldp/stp,
-// and if so, track it in the appropriate data structure for this basic
-// block. LOAD_P is true if the access is a load, and MEM is the mem
-// rtx that occurs in INSN.
+// determine whether it could be a candidate for fusing into a paired
+// access, and if so, track it in the appropriate data structure for
+// this basic block. LOAD_P is true if the access is a load, and MEM
+// is the mem rtx that occurs in INSN.
void
ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
{
@@ -441,35 +684,24 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
if (MEM_VOLATILE_P (mem))
return;
- // Ignore writeback accesses if the param says to do so.
- if (!aarch64_ldp_writeback
+ // Ignore writeback accesses if the hook says to do so.
+ if (!m_pass->handle_writeback_opportunities (writeback::EXISTING)
&& GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
return;
const machine_mode mem_mode = GET_MODE (mem);
- if (!ldp_operand_mode_ok_p (mem_mode))
+ if (!m_pass->pair_operand_mode_ok_p (mem_mode))
return;
rtx reg_op = XEXP (PATTERN (insn->rtl ()), !load_p);
- // Ignore the access if the register operand isn't suitable for ldp/stp.
- if (load_p
- ? !aarch64_ldp_reg_operand (reg_op, mem_mode)
- : !aarch64_stp_reg_operand (reg_op, mem_mode))
+ if (!m_pass->pair_reg_operand_ok_p (load_p, reg_op, mem_mode))
return;
// We want to segregate FP/SIMD accesses from GPR accesses.
- //
- // Before RA, we use the modes, noting that stores of constant zero
- // operands use GPRs (even in non-integer modes). After RA, we use
- // the hard register numbers.
- const bool fpsimd_op_p
- = reload_completed
- ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op)))
- : (GET_MODE_CLASS (mem_mode) != MODE_INT
- && (load_p || !aarch64_const_zero_rtx_p (reg_op)));
+ const bool fpsimd_op_p = m_pass->fpsimd_op_p (reg_op, mem_mode, load_p);
- // Note ldp_operand_mode_ok_p already rejected VL modes.
+ // Note pair_operand_mode_ok_p already rejected VL modes.
const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
const lfs_fields lfs = { load_p, fpsimd_op_p, mem_size };
@@ -498,8 +730,8 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
// elimination offset pre-RA, we should postpone forming pairs on such
// accesses until after RA.
//
- // As it stands, addresses with offsets in range for LDR but not
- // in range for LDP/STP are currently reloaded inefficiently,
+ // As it stands, addresses in range for an individual load/store but not
+ // for a paired access are currently reloaded inefficiently,
// ending up with a separate base register for each pair.
//
// In theory LRA should make use of
@@ -510,11 +742,11 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
// constraint; this early return means we never get to the code
// that calls targetm.legitimize_address_displacement.
//
- // So for now, it's better to punt when we can't be sure that the
- // offset is in range for LDP/STP. Out-of-range cases can then be
- // handled after RA by the out-of-range LDP/STP peepholes. Eventually, it
- // would be nice to handle known out-of-range opportunities in the
- // pass itself (for stack accesses, this would be in the post-RA pass).
+ // It's better to punt when we can't be sure that the
+ // offset is in range for paired access. On aarch64, Out-of-range cases
+ // can then be handled after RA by the out-of-range PAIR MEM peepholes.
+ // Eventually, it would be nice to handle known out-of-range opportunities
+ // in the pass itself (for stack accesses, this would be in the post-RA pass).
if (!reload_completed
&& (REGNO (base) == FRAME_POINTER_REGNUM
|| REGNO (base) == ARG_POINTER_REGNUM))
@@ -565,8 +797,8 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
gcc_unreachable (); // Base defs should be unique.
}
- // Punt on misaligned offsets. LDP/STP require offsets to be a multiple of
- // the access size.
+ // Punt on misaligned offsets. Paired memory accesses require offsets
+ // to be a multiple of the access size.
if (!multiple_p (mem_off, mem_size))
return;
@@ -1199,8 +1431,8 @@ extract_writebacks (bool load_p, rtx pats[2], int changed)
// base register. If there is one, we choose the first such update after
// PAIR_DST that is still in the same BB as our pair. We return the new def in
// *ADD_DEF and the resulting writeback effect in *WRITEBACK_EFFECT.
-static insn_info *
-find_trailing_add (insn_info *insns[2],
+insn_info *
+pair_fusion::find_trailing_add (insn_info *insns[2],
const insn_range_info &pair_range,
int initial_writeback,
rtx *writeback_effect,
@@ -1278,7 +1510,7 @@ find_trailing_add (insn_info *insns[2],
off_hwi /= access_size;
- if (off_hwi < LDP_MIN_IMM || off_hwi > LDP_MAX_IMM)
+ if (!pair_mem_in_range_p (off_hwi))
return nullptr;
auto dump_prefix = [&]()
@@ -1792,7 +2024,7 @@ ldp_bb_info::fuse_pair (bool load_p,
{
if (dump_file)
fprintf (dump_file,
- " ldp: i%d has wb but subsequent i%d has non-wb "
+ " load pair: i%d has wb but subsequent i%d has non-wb "
"update of base (r%d), dropping wb\n",
insns[0]->uid (), insns[1]->uid (), base_regno);
gcc_assert (writeback_effect);
@@ -1815,7 +2047,7 @@ ldp_bb_info::fuse_pair (bool load_p,
}
// If either of the original insns had writeback, but the resulting pair insn
- // does not (can happen e.g. in the ldp edge case above, or if the writeback
+ // does not (can happen e.g. in the load pair edge case above, or if the writeback
// effects cancel out), then drop the def(s) of the base register as
// appropriate.
//
@@ -1834,7 +2066,7 @@ ldp_bb_info::fuse_pair (bool load_p,
// update of the base register and try and fold it in to make this into a
// writeback pair.
insn_info *trailing_add = nullptr;
- if (aarch64_ldp_writeback > 1
+ if (m_pass->handle_writeback_opportunities (writeback::ALL)
&& !writeback_effect
&& (!load_p || (!refers_to_regno_p (base_regno, base_regno + 1,
XEXP (pats[0], 0), nullptr)
@@ -1842,7 +2074,7 @@ ldp_bb_info::fuse_pair (bool load_p,
XEXP (pats[1], 0), nullptr))))
{
def_info *add_def;
- trailing_add = find_trailing_add (insns, move_range, writeback,
+ trailing_add = m_pass->find_trailing_add (insns, move_range, writeback,
&writeback_effect,
&add_def, base.def, offsets[0],
access_size);
@@ -1855,35 +2087,19 @@ ldp_bb_info::fuse_pair (bool load_p,
}
// Now that we know what base mem we're going to use, check if it's OK
- // with the ldp/stp policy.
+ // with the pair mem policy.
rtx first_mem = XEXP (pats[0], load_p);
- if (!aarch64_mem_ok_with_ldpstp_policy_model (first_mem,
- load_p,
- GET_MODE (first_mem)))
+ if (!m_pass->pair_mem_ok_with_policy (first_mem, load_p))
{
if (dump_file)
- fprintf (dump_file, "punting on pair (%d,%d), ldp/stp policy says no\n",
+ fprintf (dump_file, "punting on pair (%d,%d), pair mem policy says no\n",
i1->uid (), i2->uid ());
return false;
}
rtx reg_notes = combine_reg_notes (first, second, load_p);
- rtx pair_pat;
- if (writeback_effect)
- {
- auto patvec = gen_rtvec (3, writeback_effect, pats[0], pats[1]);
- pair_pat = gen_rtx_PARALLEL (VOIDmode, patvec);
- }
- else if (load_p)
- pair_pat = aarch64_gen_load_pair (XEXP (pats[0], 0),
- XEXP (pats[1], 0),
- XEXP (pats[0], 1));
- else
- pair_pat = aarch64_gen_store_pair (XEXP (pats[0], 0),
- XEXP (pats[0], 1),
- XEXP (pats[1], 1));
-
+ rtx pair_pat = m_pass->gen_pair (pats, writeback_effect, load_p);
insn_change *pair_change = nullptr;
auto set_pair_pat = [pair_pat,reg_notes](insn_change *change) {
rtx_insn *rti = change->insn ()->rtl ();
@@ -2125,15 +2341,6 @@ load_modified_by_store_p (insn_info *load,
return false;
}
-// Virtual base class for load/store walkers used in alias analysis.
-struct alias_walker
-{
- virtual bool conflict_p (int &budget) const = 0;
- virtual insn_info *insn () const = 0;
- virtual bool valid () const = 0;
- virtual void advance () = 0;
-};
-
// Implement some common functionality used by both store_walker
// and load_walker.
template<bool reverse>
@@ -2251,13 +2458,13 @@ public:
//
// We try to maintain the invariant that if a walker becomes invalid, we
// set its pointer to null.
-static void
-do_alias_analysis (insn_info *alias_hazards[4],
+void
+pair_fusion::do_alias_analysis (insn_info *alias_hazards[4],
alias_walker *walkers[4],
bool load_p)
{
const int n_walkers = 2 + (2 * !load_p);
- int budget = aarch64_ldp_alias_check_limit;
+ int budget = pair_mem_alias_check_limit ();
auto next_walker = [walkers,n_walkers](int current) -> int {
for (int j = 1; j <= n_walkers; j++)
@@ -2342,8 +2549,8 @@ do_alias_analysis (insn_info *alias_hazards[4],
//
// Returns an integer where bit (1 << i) is set if INSNS[i] uses writeback
// addressing.
-static int
-get_viable_bases (insn_info *insns[2],
+int
+pair_fusion::get_viable_bases (insn_info *insns[2],
vec<base_cand> &base_cands,
rtx cand_mems[2],
unsigned access_size,
@@ -2389,7 +2596,7 @@ get_viable_bases (insn_info *insns[2],
if (!is_lower)
base_off--;
- if (base_off < LDP_MIN_IMM || base_off > LDP_MAX_IMM)
+ if (!pair_mem_in_range_p (base_off))
continue;
use_info *use = find_access (insns[i]->uses (), REGNO (base));
@@ -2446,7 +2653,7 @@ get_viable_bases (insn_info *insns[2],
}
// Given two adjacent memory accesses of the same size, I1 and I2, try
-// and see if we can merge them into a ldp or stp.
+// and see if we can merge them into a paired access.
//
// ACCESS_SIZE gives the (common) size of a single access, LOAD_P is true
// if the accesses are both loads, otherwise they are both stores.
@@ -2486,7 +2693,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
{
if (dump_file)
fprintf (dump_file,
- "punting on ldp due to reg conflcits (%d,%d)\n",
+ "punting on pair mem load due to reg conflcits (%d,%d)\n",
insns[0]->uid (), insns[1]->uid ());
return false;
}
@@ -2504,7 +2711,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
auto_vec<base_cand, 2> base_cands (2);
- int writeback = get_viable_bases (insns, base_cands, cand_mems,
+ int writeback = m_pass->get_viable_bases (insns, base_cands, cand_mems,
access_size, reversed);
if (base_cands.is_empty ())
{
@@ -2633,7 +2840,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
walkers[1] = &backward_store_walker;
if (load_p && (mem_defs[0] || mem_defs[1]))
- do_alias_analysis (alias_hazards, walkers, load_p);
+ m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
else
{
// We want to find any loads hanging off the first store.
@@ -2642,7 +2849,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
load_walker<true> backward_load_walker (mem_defs[1], insns[1], insns[0]);
walkers[2] = &forward_load_walker;
walkers[3] = &backward_load_walker;
- do_alias_analysis (alias_hazards, walkers, load_p);
+ m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
// Now consolidate hazards back down.
if (alias_hazards[2]
&& (!alias_hazards[0] || (*alias_hazards[2] < *alias_hazards[0])))
@@ -2956,26 +3163,6 @@ ldp_bb_info::transform ()
traverse_base_map (def_map);
}
-static void
-ldp_fusion_init ()
-{
- calculate_dominance_info (CDI_DOMINATORS);
- df_analyze ();
- crtl->ssa = new rtl_ssa::function_info (cfun);
-}
-
-static void
-ldp_fusion_destroy ()
-{
- if (crtl->ssa->perform_pending_updates ())
- cleanup_cfg (0);
-
- free_dominance_info (CDI_DOMINATORS);
-
- delete crtl->ssa;
- crtl->ssa = nullptr;
-}
-
// Given a load pair insn in PATTERN, unpack the insn, storing
// the registers in REGS and returning the mem.
static rtx
@@ -3015,16 +3202,19 @@ aarch64_destructure_store_pair (rtx regs[2], rtx pattern)
return mem;
}
-// Given a pair mem in PAIR_MEM, register operands in REGS, and an rtx
-// representing the effect of writeback on the base register in WB_EFFECT,
-// return an insn representing a writeback variant of this pair.
-// LOAD_P is true iff the pair is a load.
-//
-// This is used when promoting existing non-writeback pairs to writeback
-// variants.
-static rtx
-aarch64_gen_writeback_pair (rtx wb_effect, rtx pair_mem, rtx regs[2],
- bool load_p)
+rtx
+aarch64_pair_fusion::destructure_pair (rtx regs[2], rtx rti, bool load_p)
+{
+ if (load_p)
+ return aarch64_destructure_load_pair (regs, rti);
+ else
+ return aarch64_destructure_store_pair (regs, rti);
+}
+
+rtx
+aarch64_pair_fusion::gen_promote_writeback_pair (rtx wb_effect, rtx pair_mem,
+ rtx regs[2],
+ bool load_p)
{
auto op_mode = aarch64_operand_mode_for_pair_mode (GET_MODE (pair_mem));
@@ -3059,23 +3249,13 @@ aarch64_gen_writeback_pair (rtx wb_effect, rtx pair_mem, rtx regs[2],
// Given an existing pair insn INSN, look for a trailing update of
// the base register which we can fold in to make this pair use
// a writeback addressing mode.
-static void
-try_promote_writeback (insn_info *insn)
+void
+pair_fusion::try_promote_writeback (insn_info *insn, bool load_p)
{
- auto rti = insn->rtl ();
- const auto attr = get_attr_ldpstp (rti);
- if (attr == LDPSTP_NONE)
- return;
-
- bool load_p = (attr == LDPSTP_LDP);
- gcc_checking_assert (load_p || attr == LDPSTP_STP);
-
rtx regs[2];
rtx mem = NULL_RTX;
- if (load_p)
- mem = aarch64_destructure_load_pair (regs, PATTERN (rti));
- else
- mem = aarch64_destructure_store_pair (regs, PATTERN (rti));
+
+ mem = destructure_pair (regs, PATTERN (insn->rtl ()), load_p);
gcc_checking_assert (MEM_P (mem));
poly_int64 offset;
@@ -3112,9 +3292,10 @@ try_promote_writeback (insn_info *insn)
def_info *add_def;
const insn_range_info pair_range (insn);
insn_info *insns[2] = { nullptr, insn };
- insn_info *trailing_add = find_trailing_add (insns, pair_range, 0, &wb_effect,
- &add_def, base_def, offset,
- access_size);
+ insn_info *trailing_add
+ = find_trailing_add (insns, pair_range, 0, &wb_effect,
+ &add_def, base_def, offset,
+ access_size);
if (!trailing_add)
return;
@@ -3124,8 +3305,10 @@ try_promote_writeback (insn_info *insn)
insn_change del_change (trailing_add, insn_change::DELETE);
insn_change *changes[] = { &pair_change, &del_change };
- rtx pair_pat = aarch64_gen_writeback_pair (wb_effect, mem, regs, load_p);
- validate_unshare_change (rti, &PATTERN (rti), pair_pat, true);
+ rtx pair_pat = gen_promote_writeback_pair (wb_effect, mem, regs,
+ load_p);
+ validate_unshare_change (insn->rtl(), &PATTERN (insn->rtl()), pair_pat,
+ true);
// The pair must gain the def of the base register from the add.
pair_change.new_defs = insert_access (attempt,
@@ -3159,14 +3342,12 @@ try_promote_writeback (insn_info *insn)
// for load/store candidates. If running after RA, also try and promote
// non-writeback pairs to use writeback addressing. Then try to fuse
// candidates into pairs.
-void ldp_fusion_bb (bb_info *bb)
+void pair_fusion::ldp_fusion_bb (bb_info *bb)
{
- const bool track_loads
- = aarch64_tune_params.ldp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
- const bool track_stores
- = aarch64_tune_params.stp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
+ const bool track_loads = track_loads_p ();
+ const bool track_stores = track_stores_p ();
- ldp_bb_info bb_state (bb);
+ ldp_bb_info bb_state (bb, this);
for (auto insn : bb->nondebug_insns ())
{
@@ -3176,11 +3357,11 @@ void ldp_fusion_bb (bb_info *bb)
continue;
rtx pat = PATTERN (rti);
+ bool load_p;
if (reload_completed
- && aarch64_ldp_writeback > 1
- && GET_CODE (pat) == PARALLEL
- && XVECLEN (pat, 0) == 2)
- try_promote_writeback (insn);
+ && handle_writeback_opportunities (writeback::ALL)
+ && pair_mem_insn_p (rti, load_p))
+ try_promote_writeback (insn, load_p);
if (GET_CODE (pat) != SET)
continue;
@@ -3195,16 +3376,6 @@ void ldp_fusion_bb (bb_info *bb)
bb_state.cleanup_tombstones ();
}
-void ldp_fusion ()
-{
- ldp_fusion_init ();
-
- for (auto bb : crtl->ssa->bbs ())
- ldp_fusion_bb (bb);
-
- ldp_fusion_destroy ();
-}
-
namespace {
const pass_data pass_data_ldp_fusion =
@@ -3234,14 +3405,6 @@ public:
if (!optimize || optimize_debug)
return false;
- // If the tuning policy says never to form ldps or stps, don't run
- // the pass.
- if ((aarch64_tune_params.ldp_policy_model
- == AARCH64_LDP_STP_POLICY_NEVER)
- && (aarch64_tune_params.stp_policy_model
- == AARCH64_LDP_STP_POLICY_NEVER))
- return false;
-
if (reload_completed)
return flag_aarch64_late_ldp_fusion;
else
@@ -3250,7 +3413,8 @@ public:
unsigned execute (function *) final override
{
- ldp_fusion ();
+ aarch64_pair_fusion pass;
+ pass.run ();
return 0;
}
};