===================================================================
@@ -2514,20 +2514,9 @@
"lvxl %0,%y1"
[(set_attr "type" "vecload")])
-(define_expand "altivec_lvx_<mode>"
- [(parallel
- [(set (match_operand:VM2 0 "register_operand" "=v")
- (match_operand:VM2 1 "memory_operand" "Z"))
- (unspec [(const_int 0)] UNSPEC_LVX)])]
- "TARGET_ALTIVEC"
-{
- if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
- {
- altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVX);
- DONE;
- }
-})
-
+; This version of lvx is used only in cases where we need to force an lvx
+; over any other load, and we don't care about losing CSE opportunities.
+; Its primary use is for prologue register saves.
(define_insn "altivec_lvx_<mode>_internal"
[(parallel
[(set (match_operand:VM2 0 "register_operand" "=v")
@@ -2537,20 +2526,45 @@
"lvx %0,%y1"
[(set_attr "type" "vecload")])
-(define_expand "altivec_stvx_<mode>"
- [(parallel
- [(set (match_operand:VM2 0 "memory_operand" "=Z")
- (match_operand:VM2 1 "register_operand" "v"))
- (unspec [(const_int 0)] UNSPEC_STVX)])]
- "TARGET_ALTIVEC"
-{
- if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
- {
- altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVX);
- DONE;
- }
-})
+; The next two patterns embody what lvx should usually look like.
+(define_insn "altivec_lvx_<mode>_2op"
+ [(set (match_operand:VM2 0 "register_operand" "=v")
+ (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b")
+ (match_operand:DI 2 "register_operand" "r"))
+ (const_int -16))))]
+ "TARGET_ALTIVEC && TARGET_64BIT"
+ "lvx %0,%1,%2"
+ [(set_attr "type" "vecload")])
+(define_insn "altivec_lvx_<mode>_1op"
+ [(set (match_operand:VM2 0 "register_operand" "=v")
+ (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r")
+ (const_int -16))))]
+ "TARGET_ALTIVEC && TARGET_64BIT"
+ "lvx %0,0,%1"
+ [(set_attr "type" "vecload")])
+
+; 32-bit versions of the above.
+(define_insn "altivec_lvx_<mode>_2op_si"
+ [(set (match_operand:VM2 0 "register_operand" "=v")
+ (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "register_operand" "r"))
+ (const_int -16))))]
+ "TARGET_ALTIVEC && TARGET_32BIT"
+ "lvx %0,%1,%2"
+ [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvx_<mode>_1op_si"
+ [(set (match_operand:VM2 0 "register_operand" "=v")
+ (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int -16))))]
+ "TARGET_ALTIVEC && TARGET_32BIT"
+ "lvx %0,0,%1"
+ [(set_attr "type" "vecload")])
+
+; This version of stvx is used only in cases where we need to force an stvx
+; over any other store, and we don't care about losing CSE opportunities.
+; Its primary use is for epilogue register restores.
(define_insn "altivec_stvx_<mode>_internal"
[(parallel
[(set (match_operand:VM2 0 "memory_operand" "=Z")
@@ -2560,6 +2574,42 @@
"stvx %1,%y0"
[(set_attr "type" "vecstore")])
+; The next two patterns embody what stvx should usually look like.
+(define_insn "altivec_stvx_<mode>_2op"
+ [(set (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b")
+ (match_operand:DI 2 "register_operand" "r"))
+ (const_int -16)))
+ (match_operand:VM2 0 "register_operand" "v"))]
+ "TARGET_ALTIVEC && TARGET_64BIT"
+ "stvx %0,%1,%2"
+ [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvx_<mode>_1op"
+ [(set (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r")
+ (const_int -16)))
+ (match_operand:VM2 0 "register_operand" "v"))]
+ "TARGET_ALTIVEC && TARGET_64BIT"
+ "stvx %0,0,%1"
+ [(set_attr "type" "vecstore")])
+
+; 32-bit versions of the above.
+(define_insn "altivec_stvx_<mode>_2op_si"
+ [(set (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "register_operand" "r"))
+ (const_int -16)))
+ (match_operand:VM2 0 "register_operand" "v"))]
+ "TARGET_ALTIVEC && TARGET_32BIT"
+ "stvx %0,%1,%2"
+ [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvx_<mode>_1op_si"
+ [(set (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int -16)))
+ (match_operand:VM2 0 "register_operand" "v"))]
+ "TARGET_ALTIVEC && TARGET_32BIT"
+ "stvx %0,0,%1"
+ [(set_attr "type" "vecstore")])
+
(define_expand "altivec_stvxl_<mode>"
[(parallel
[(set (match_operand:VM2 0 "memory_operand" "=Z")
===================================================================
@@ -4800,6 +4800,130 @@ assignment for unaligned loads and stores");
return stmt;
}
+ /* Expand vec_ld into an expression that masks the address and
+ performs the load. We need to expand this early to allow
+ the best aliasing, as by the time we get into RTL we no longer
+ are able to honor __restrict__, for example. We may want to
+ consider this for all memory access built-ins.
+
+ When -maltivec=be is specified, simply punt to existing
+ built-in processing. */
+ if (fcode == ALTIVEC_BUILTIN_VEC_LD
+ && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG))
+ {
+ tree arg0 = (*arglist)[0];
+ tree arg1 = (*arglist)[1];
+
+ /* Strip qualifiers like "const" from the pointer arg. */
+ tree arg1_type = TREE_TYPE (arg1);
+ tree inner_type = TREE_TYPE (arg1_type);
+ if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
+ {
+ arg1_type = build_pointer_type (build_qualified_type (inner_type,
+ 0));
+ arg1 = fold_convert (arg1_type, arg1);
+ }
+
+ /* Construct the masked address. Let existing error handling take
+ over if we don't have a constant offset. */
+ arg0 = fold (arg0);
+
+ if (TREE_CODE (arg0) == INTEGER_CST)
+ {
+ if (!ptrofftype_p (TREE_TYPE (arg0)))
+ arg0 = build1 (NOP_EXPR, sizetype, arg0);
+
+ tree arg1_type = TREE_TYPE (arg1);
+ tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type,
+ arg1, arg0);
+ tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr,
+ build_int_cst (arg1_type, -16));
+
+ /* Find the built-in to get the return type so we can convert
+ the result properly (or fall back to default handling if the
+ arguments aren't compatible). */
+ for (desc = altivec_overloaded_builtins;
+ desc->code && desc->code != fcode; desc++)
+ continue;
+
+ for (; desc->code == fcode; desc++)
+ if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
+ && (rs6000_builtin_type_compatible (TREE_TYPE (arg1),
+ desc->op2)))
+ {
+ tree ret_type = rs6000_builtin_type (desc->ret_type);
+ if (TYPE_MODE (ret_type) == V2DImode)
+ /* Type-based aliasing analysis thinks vector long
+ and vector long long are different and will put them
+ in distinct alias classes. Force our return type
+ to be a may-alias type to avoid this. */
+ ret_type
+ = build_pointer_type_for_mode (ret_type, Pmode,
+ true/*can_alias_all*/);
+ else
+ ret_type = build_pointer_type (ret_type);
+ aligned = build1 (NOP_EXPR, ret_type, aligned);
+ tree ret_val = build_indirect_ref (loc, aligned, RO_NULL);
+ return ret_val;
+ }
+ }
+ }
+
+ /* Similarly for stvx. */
+ if (fcode == ALTIVEC_BUILTIN_VEC_ST
+ && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG))
+ {
+ tree arg0 = (*arglist)[0];
+ tree arg1 = (*arglist)[1];
+ tree arg2 = (*arglist)[2];
+
+ /* Construct the masked address. Let existing error handling take
+ over if we don't have a constant offset. */
+ arg1 = fold (arg1);
+
+ if (TREE_CODE (arg1) == INTEGER_CST)
+ {
+ if (!ptrofftype_p (TREE_TYPE (arg1)))
+ arg1 = build1 (NOP_EXPR, sizetype, arg1);
+
+ tree arg2_type = TREE_TYPE (arg2);
+ tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type,
+ arg2, arg1);
+ tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, addr,
+ build_int_cst (arg2_type, -16));
+
+ /* Find the built-in to make sure a compatible one exists; if not
+ we fall back to default handling to get the error message. */
+ for (desc = altivec_overloaded_builtins;
+ desc->code && desc->code != fcode; desc++)
+ continue;
+
+ for (; desc->code == fcode; desc++)
+ if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
+ && rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2)
+ && rs6000_builtin_type_compatible (TREE_TYPE (arg2),
+ desc->op3))
+ {
+ tree arg0_type = TREE_TYPE (arg0);
+ if (TYPE_MODE (arg0_type) == V2DImode)
+ /* Type-based aliasing analysis thinks vector long
+ and vector long long are different and will put them
+ in distinct alias classes. Force our address type
+ to be a may-alias type to avoid this. */
+ arg0_type
+ = build_pointer_type_for_mode (arg0_type, Pmode,
+ true/*can_alias_all*/);
+ else
+ arg0_type = build_pointer_type (arg0_type);
+ aligned = build1 (NOP_EXPR, arg0_type, aligned);
+ tree stg = build_indirect_ref (loc, aligned, RO_NULL);
+ tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg,
+ convert (TREE_TYPE (stg), arg0));
+ return retval;
+ }
+ }
+ }
+
for (n = 0;
!VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
fnargs = TREE_CHAIN (fnargs), n++)
===================================================================
@@ -13025,9 +13025,9 @@ swap_selector_for_mode (machine_mode mode)
return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
}
-/* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
- with -maltivec=be specified. Issue the load followed by an element-reversing
- permute. */
+/* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
+ with -maltivec=be specified. Issue the load followed by an element-
+ reversing permute. */
void
altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
{
@@ -13043,8 +13043,8 @@ altivec_expand_lvx_be (rtx op0, rtx op1, machine_m
emit_insn (gen_rtx_SET (op0, vperm));
}
-/* Generate code for a "stvx" or "stvxl" built-in for a little endian target
- with -maltivec=be specified. Issue the store preceded by an element-reversing
+/* Generate code for a "stvxl" built-in for a little endian target with
+ -maltivec=be specified. Issue the store preceded by an element-reversing
permute. */
void
altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
@@ -13106,22 +13106,65 @@ altivec_expand_lv_builtin (enum insn_code icode, t
op1 = copy_to_mode_reg (mode1, op1);
- if (op0 == const0_rtx)
+ /* For LVX, express the RTL accurately by ANDing the address with -16.
+ LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
+ so the raw address is fine. */
+ switch (icode)
{
- addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
- }
- else
- {
- op0 = copy_to_mode_reg (mode0, op0);
- addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
- }
+ case CODE_FOR_altivec_lvx_v2df_2op:
+ case CODE_FOR_altivec_lvx_v2di_2op:
+ case CODE_FOR_altivec_lvx_v4sf_2op:
+ case CODE_FOR_altivec_lvx_v4si_2op:
+ case CODE_FOR_altivec_lvx_v8hi_2op:
+ case CODE_FOR_altivec_lvx_v16qi_2op:
+ {
+ rtx rawaddr;
+ if (op0 == const0_rtx)
+ rawaddr = op1;
+ else
+ {
+ op0 = copy_to_mode_reg (mode0, op0);
+ rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
+ }
+ addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
+ addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
- pat = GEN_FCN (icode) (target, addr);
+ /* For -maltivec=be, emit the load and follow it up with a
+ permute to swap the elements. */
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+ {
+ rtx temp = gen_reg_rtx (tmode);
+ emit_insn (gen_rtx_SET (temp, addr));
- if (! pat)
- return 0;
- emit_insn (pat);
+ rtx sel = swap_selector_for_mode (tmode);
+ rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (target, vperm));
+ }
+ else
+ emit_insn (gen_rtx_SET (target, addr));
+ break;
+ }
+
+ default:
+ if (op0 == const0_rtx)
+ addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+ else
+ {
+ op0 = copy_to_mode_reg (mode0, op0);
+ addr = gen_rtx_MEM (blk ? BLKmode : tmode,
+ gen_rtx_PLUS (Pmode, op1, op0));
+ }
+
+ pat = GEN_FCN (icode) (target, addr);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+
+ break;
+ }
+
return target;
}
@@ -13208,7 +13251,7 @@ altivec_expand_stv_builtin (enum insn_code icode,
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
- rtx pat, addr;
+ rtx pat, addr, rawaddr;
machine_mode tmode = insn_data[icode].operand[0].mode;
machine_mode smode = insn_data[icode].operand[1].mode;
machine_mode mode1 = Pmode;
@@ -13220,24 +13263,69 @@ altivec_expand_stv_builtin (enum insn_code icode,
|| arg2 == error_mark_node)
return const0_rtx;
- if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
- op0 = copy_to_mode_reg (smode, op0);
-
op2 = copy_to_mode_reg (mode2, op2);
- if (op1 == const0_rtx)
+ /* For STVX, express the RTL accurately by ANDing the address with -16.
+ STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
+ so the raw address is fine. */
+ switch (icode)
{
- addr = gen_rtx_MEM (tmode, op2);
- }
- else
- {
- op1 = copy_to_mode_reg (mode1, op1);
- addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
- }
+ case CODE_FOR_altivec_stvx_v2df_2op:
+ case CODE_FOR_altivec_stvx_v2di_2op:
+ case CODE_FOR_altivec_stvx_v4sf_2op:
+ case CODE_FOR_altivec_stvx_v4si_2op:
+ case CODE_FOR_altivec_stvx_v8hi_2op:
+ case CODE_FOR_altivec_stvx_v16qi_2op:
+ {
+ if (op1 == const0_rtx)
+ rawaddr = op2;
+ else
+ {
+ op1 = copy_to_mode_reg (mode1, op1);
+ rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
+ }
- pat = GEN_FCN (icode) (addr, op0);
- if (pat)
- emit_insn (pat);
+ addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
+ addr = gen_rtx_MEM (tmode, addr);
+
+ op0 = copy_to_mode_reg (tmode, op0);
+
+ /* For -maltivec=be, emit a permute to swap the elements, followed
+ by the store. */
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+ {
+ rtx temp = gen_reg_rtx (tmode);
+ rtx sel = swap_selector_for_mode (tmode);
+ rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (temp, vperm));
+ emit_insn (gen_rtx_SET (addr, temp));
+ }
+ else
+ emit_insn (gen_rtx_SET (addr, op0));
+
+ break;
+ }
+
+ default:
+ {
+ if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
+ op0 = copy_to_mode_reg (smode, op0);
+
+ if (op1 == const0_rtx)
+ addr = gen_rtx_MEM (tmode, op2);
+ else
+ {
+ op1 = copy_to_mode_reg (mode1, op1);
+ addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
+ }
+
+ pat = GEN_FCN (icode) (addr, op0);
+ if (pat)
+ emit_insn (pat);
+ }
+ }
+
return NULL_RTX;
}
@@ -14073,18 +14161,18 @@ altivec_expand_builtin (tree exp, rtx target, bool
switch (fcode)
{
case ALTIVEC_BUILTIN_STVX_V2DF:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
case ALTIVEC_BUILTIN_STVX_V2DI:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
case ALTIVEC_BUILTIN_STVX_V4SF:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
case ALTIVEC_BUILTIN_STVX:
case ALTIVEC_BUILTIN_STVX_V4SI:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
case ALTIVEC_BUILTIN_STVX_V8HI:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
case ALTIVEC_BUILTIN_STVX_V16QI:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
case ALTIVEC_BUILTIN_STVEBX:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
case ALTIVEC_BUILTIN_STVEHX:
@@ -14272,23 +14360,23 @@ altivec_expand_builtin (tree exp, rtx target, bool
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
exp, target, false);
case ALTIVEC_BUILTIN_LVX_V2DF:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
exp, target, false);
case ALTIVEC_BUILTIN_LVX_V2DI:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
exp, target, false);
case ALTIVEC_BUILTIN_LVX_V4SF:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
exp, target, false);
case ALTIVEC_BUILTIN_LVX:
case ALTIVEC_BUILTIN_LVX_V4SI:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
exp, target, false);
case ALTIVEC_BUILTIN_LVX_V8HI:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
exp, target, false);
case ALTIVEC_BUILTIN_LVX_V16QI:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
exp, target, false);
case ALTIVEC_BUILTIN_LVLX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
@@ -37139,7 +37227,9 @@ insn_is_swappable_p (swap_web_entry *insn_entry, r
fix them up by converting them to permuting ones. Exceptions:
UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
- for the SET source. */
+ for the SET source. Also we must now make an exception for lvx
+ and stvx when they are not in the UNSPEC_LVX/STVX form (with the
+ explicit "& -16") since this leads to unrecognizable insns. */
rtx body = PATTERN (insn);
int i = INSN_UID (insn);
@@ -37147,6 +37237,11 @@ insn_is_swappable_p (swap_web_entry *insn_entry, r
{
if (GET_CODE (body) == SET)
{
+ rtx rhs = SET_SRC (body);
+ gcc_assert (GET_CODE (rhs) == MEM);
+ if (GET_CODE (XEXP (rhs, 0)) == AND)
+ return 0;
+
*special = SH_NOSWAP_LD;
return 1;
}
@@ -37156,8 +37251,14 @@ insn_is_swappable_p (swap_web_entry *insn_entry, r
if (insn_entry[i].is_store)
{
- if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
+ if (GET_CODE (body) == SET
+ && GET_CODE (SET_SRC (body)) != UNSPEC)
{
+ rtx lhs = SET_DEST (body);
+ gcc_assert (GET_CODE (lhs) == MEM);
+ if (GET_CODE (XEXP (lhs, 0)) == AND)
+ return 0;
+
*special = SH_NOSWAP_ST;
return 1;
}
@@ -37827,6 +37928,267 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
fputs ("\n", dump_file);
}
+/* Return RTX with its address canonicalized to (reg) or (+ reg reg).
+ Here RTX is an (& addr (const_int -16)). Always return a new copy
+ to avoid problems with combine. */
+static rtx
+alignment_with_canonical_addr (rtx align)
+{
+ rtx canon;
+ rtx addr = XEXP (align, 0);
+
+ if (REG_P (addr))
+ canon = addr;
+
+ else if (GET_CODE (addr) == PLUS)
+ {
+ rtx addrop0 = XEXP (addr, 0);
+ rtx addrop1 = XEXP (addr, 1);
+
+ if (!REG_P (addrop0))
+ addrop0 = force_reg (GET_MODE (addrop0), addrop0);
+
+ if (!REG_P (addrop1))
+ addrop1 = force_reg (GET_MODE (addrop1), addrop1);
+
+ canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
+ }
+
+ else
+ canon = force_reg (GET_MODE (addr), addr);
+
+ return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
+}
+
+/* Check whether an rtx is an alignment mask, and if so, return
+ a fully-expanded rtx for the masking operation. */
+static rtx
+alignment_mask (rtx_insn *insn)
+{
+ rtx body = PATTERN (insn);
+
+ if (GET_CODE (body) != SET
+ || GET_CODE (SET_SRC (body)) != AND
+ || !REG_P (XEXP (SET_SRC (body), 0)))
+ return 0;
+
+ rtx mask = XEXP (SET_SRC (body), 1);
+
+ if (GET_CODE (mask) == CONST_INT)
+ {
+ if (INTVAL (mask) == -16)
+ return alignment_with_canonical_addr (SET_SRC (body));
+ else
+ return 0;
+ }
+
+ if (!REG_P (mask))
+ return 0;
+
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ df_ref use;
+ rtx real_mask = 0;
+
+ FOR_EACH_INSN_INFO_USE (use, insn_info)
+ {
+ if (!rtx_equal_p (DF_REF_REG (use), mask))
+ continue;
+
+ struct df_link *def_link = DF_REF_CHAIN (use);
+ if (!def_link || def_link->next)
+ return 0;
+
+ rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
+ rtx const_body = PATTERN (const_insn);
+ if (GET_CODE (const_body) != SET)
+ return 0;
+
+ real_mask = SET_SRC (const_body);
+
+ if (GET_CODE (real_mask) != CONST_INT
+ || INTVAL (real_mask) != -16)
+ return 0;
+ }
+
+ if (real_mask == 0)
+ return 0;
+
+ return alignment_with_canonical_addr (SET_SRC (body));
+}
+
+/* Given INSN that's a load or store based at BASE_REG, look for a
+ feeding computation that aligns its address on a 16-byte boundary. */
+static rtx
+find_alignment_op (rtx_insn *insn, rtx base_reg)
+{
+ df_ref base_use;
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ rtx and_operation = 0;
+
+ FOR_EACH_INSN_INFO_USE (base_use, insn_info)
+ {
+ if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
+ continue;
+
+ struct df_link *base_def_link = DF_REF_CHAIN (base_use);
+ if (!base_def_link || base_def_link->next)
+ break;
+
+ rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
+ and_operation = alignment_mask (and_insn);
+ if (and_operation != 0)
+ break;
+ }
+
+ return and_operation;
+}
+
+struct del_info { bool replace; rtx_insn *replace_insn; };
+
+/* If INSN is the load for an lvx pattern, put it in canonical form. */
+static void
+combine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
+{
+ rtx body = PATTERN (insn);
+ gcc_assert (GET_CODE (body) == SET
+ && GET_CODE (SET_SRC (body)) == VEC_SELECT
+ && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
+
+ rtx mem = XEXP (SET_SRC (body), 0);
+ rtx base_reg = XEXP (mem, 0);
+
+ rtx and_operation = find_alignment_op (insn, base_reg);
+
+ if (and_operation != 0)
+ {
+ df_ref def;
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ FOR_EACH_INSN_INFO_DEF (def, insn_info)
+ {
+ struct df_link *link = DF_REF_CHAIN (def);
+ if (!link || link->next)
+ break;
+
+ rtx_insn *swap_insn = DF_REF_INSN (link->ref);
+ if (!insn_is_swap_p (swap_insn)
+ || insn_is_load_p (swap_insn)
+ || insn_is_store_p (swap_insn))
+ break;
+
+ /* Expected lvx pattern found. Change the swap to
+ a copy, and propagate the AND operation into the
+ load. */
+ to_delete[INSN_UID (swap_insn)].replace = true;
+ to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
+
+ XEXP (mem, 0) = and_operation;
+ SET_SRC (body) = mem;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "lvx opportunity found at %d\n",
+ INSN_UID (insn));
+ }
+ }
+}
+
+/* If INSN is the store for an stvx pattern, put it in canonical form. */
+static void
+combine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
+{
+ rtx body = PATTERN (insn);
+ gcc_assert (GET_CODE (body) == SET
+ && GET_CODE (SET_DEST (body)) == MEM
+ && GET_CODE (SET_SRC (body)) == VEC_SELECT);
+ rtx mem = SET_DEST (body);
+ rtx base_reg = XEXP (mem, 0);
+
+ rtx and_operation = find_alignment_op (insn, base_reg);
+
+ if (and_operation != 0)
+ {
+ rtx src_reg = XEXP (SET_SRC (body), 0);
+ df_ref src_use;
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ FOR_EACH_INSN_INFO_USE (src_use, insn_info)
+ {
+ if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
+ continue;
+
+ struct df_link *link = DF_REF_CHAIN (src_use);
+ if (!link || link->next)
+ break;
+
+ rtx_insn *swap_insn = DF_REF_INSN (link->ref);
+ if (!insn_is_swap_p (swap_insn)
+ || insn_is_load_p (swap_insn)
+ || insn_is_store_p (swap_insn))
+ break;
+
+ /* Expected stvx pattern found. Change the swap to
+ a copy, and propagate the AND operation into the
+ store. */
+ to_delete[INSN_UID (swap_insn)].replace = true;
+ to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
+
+ XEXP (mem, 0) = and_operation;
+ SET_SRC (body) = src_reg;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "stvx opportunity found at %d\n",
+ INSN_UID (insn));
+ }
+ }
+}
+
+/* Look for patterns created from builtin lvx and stvx calls, and
+ canonicalize them to be properly recognized as such. */
+static void
+combine_lvx_stvx_patterns (function *fun)
+{
+ int i;
+ basic_block bb;
+ rtx_insn *insn;
+
+ int num_insns = get_max_uid ();
+ del_info *to_delete = XCNEWVEC (del_info, num_insns);
+
+ FOR_ALL_BB_FN (bb, fun)
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ if (insn_is_load_p (insn) && insn_is_swap_p (insn))
+ combine_lvx_pattern (insn, to_delete);
+ else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
+ combine_stvx_pattern (insn, to_delete);
+ }
+
+ /* Turning swaps into copies is delayed until now, to avoid problems
+ with deleting instructions during the insn walk. */
+ for (i = 0; i < num_insns; i++)
+ if (to_delete[i].replace)
+ {
+ rtx swap_body = PATTERN (to_delete[i].replace_insn);
+ rtx src_reg = XEXP (SET_SRC (swap_body), 0);
+ rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
+ rtx_insn *new_insn = emit_insn_before (copy,
+ to_delete[i].replace_insn);
+ set_block_for_insn (new_insn,
+ BLOCK_FOR_INSN (to_delete[i].replace_insn));
+ df_insn_rescan (new_insn);
+ df_insn_delete (to_delete[i].replace_insn);
+ remove_insn (to_delete[i].replace_insn);
+ to_delete[i].replace_insn->set_deleted ();
+ }
+
+ free (to_delete);
+}
+
/* Main entry point for this pass. */
unsigned int
rs6000_analyze_swaps (function *fun)
@@ -37833,7 +38195,7 @@ rs6000_analyze_swaps (function *fun)
{
swap_web_entry *insn_entry;
basic_block bb;
- rtx_insn *insn;
+ rtx_insn *insn, *curr_insn = 0;
/* Dataflow analysis for use-def chains. */
df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
@@ -37841,12 +38203,15 @@ rs6000_analyze_swaps (function *fun)
df_analyze ();
df_set_flags (DF_DEFER_INSN_RESCAN);
+ /* Pre-pass to combine lvx and stvx patterns so we don't lose info. */
+ combine_lvx_stvx_patterns (fun);
+
/* Allocate structure to represent webs of insns. */
insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
/* Walk the insns to gather basic data. */
FOR_ALL_BB_FN (bb, fun)
- FOR_BB_INSNS (bb, insn)
+ FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
{
unsigned int uid = INSN_UID (insn);
if (NONDEBUG_INSN_P (insn))
===================================================================
@@ -167,7 +167,14 @@
if (VECTOR_MEM_VSX_P (<MODE>mode))
{
operands[1] = rs6000_address_for_altivec (operands[1]);
- emit_insn (gen_altivec_lvx_<mode> (operands[0], operands[1]));
+ rtx and_op = XEXP (operands[1], 0);
+ gcc_assert (GET_CODE (and_op) == AND);
+ rtx addr = XEXP (and_op, 0);
+ if (GET_CODE (addr) == PLUS)
+ emit_insn (gen_altivec_lvx_<mode>_2op (operands[0], XEXP (addr, 0),
+ XEXP (addr, 1)));
+ else
+ emit_insn (gen_altivec_lvx_<mode>_1op (operands[0], operands[1]));
DONE;
}
}")
@@ -183,7 +190,14 @@
if (VECTOR_MEM_VSX_P (<MODE>mode))
{
operands[0] = rs6000_address_for_altivec (operands[0]);
- emit_insn (gen_altivec_stvx_<mode> (operands[0], operands[1]));
+ rtx and_op = XEXP (operands[0], 0);
+ gcc_assert (GET_CODE (and_op) == AND);
+ rtx addr = XEXP (and_op, 0);
+ if (GET_CODE (addr) == PLUS)
+ emit_insn (gen_altivec_stvx_<mode>_2op (operands[1], XEXP (addr, 0),
+ XEXP (addr, 1)));
+ else
+ emit_insn (gen_altivec_stvx_<mode>_1op (operands[1], operands[0]));
DONE;
}
}")