@@ -3393,6 +3393,170 @@ expand_DEFERRED_INIT (internal_fn, gcall *stmt)
}
}
+/* In the parallel rtx register series REGS, compute the register position for
+ given {BITPOS, BITSIZE}. The results are stored into START_INDEX, END_INDEX,
+ LEFT_BITS and RIGHT_BITS. */
+
+void
+query_position_in_parallel (HOST_WIDE_INT bitpos, HOST_WIDE_INT bitsize,
+ rtx regs, int &start_index, int &end_index,
+ HOST_WIDE_INT &left_bits, HOST_WIDE_INT &right_bits)
+{
+ int cur_index = XEXP (XVECEXP (regs, 0, 0), 0) ? 0 : 1;
+ for (; cur_index < XVECLEN (regs, 0); cur_index++)
+ {
+ rtx slot = XVECEXP (regs, 0, cur_index);
+ HOST_WIDE_INT off = UINTVAL (XEXP (slot, 1)) * BITS_PER_UNIT;
+ machine_mode mode = GET_MODE (XEXP (slot, 0));
+ HOST_WIDE_INT size = GET_MODE_BITSIZE (mode).to_constant ();
+ if (off <= bitpos && off + size > bitpos)
+ {
+ start_index = cur_index;
+ left_bits = bitpos - off;
+ }
+ if (off + size >= bitpos + bitsize)
+ {
+ end_index = cur_index;
+ right_bits = off + size - (bitpos + bitsize);
+ break;
+ }
+ }
+}
+
+/* Create a serial registers which start at FIRST_REG,
+ and SIZE is the total size of those registers. */
+static rtx
+construct_reg_seq (HOST_WIDE_INT size, rtx first_reg)
+{
+ int nregs = size / UNITS_PER_WORD + (((size % UNITS_PER_WORD) != 0) ? 1 : 0);
+ rtx *tmps = XALLOCAVEC (rtx, nregs);
+ int regno = REGNO (first_reg);
+ machine_mode mode = word_mode;
+ HOST_WIDE_INT word_size = GET_MODE_SIZE (mode).to_constant ();
+ for (int i = 0; i < nregs; i++)
+ {
+ rtx reg = gen_rtx_REG (mode, regno + i);
+ rtx off = GEN_INT (word_size * i);
+ tmps[i] = gen_rtx_EXPR_LIST (VOIDmode, reg, off);
+ }
+ return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (nregs, tmps));
+}
+
+static rtx
+get_incoming_element (tree arg, HOST_WIDE_INT bitpos, HOST_WIDE_INT bitsize,
+ bool reversep, tree expr)
+{
+ rtx regs = DECL_INCOMING_RTL (arg);
+ bool has_padding = false;
+ if (REG_P (regs) && GET_MODE (regs) == BLKmode)
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (arg));
+ has_padding = (size % UNITS_PER_WORD) != 0;
+ regs = construct_reg_seq (size, regs);
+ }
+
+ if (GET_CODE (regs) != PARALLEL)
+ return NULL_RTX;
+
+ int start_index = -1;
+ int end_index = -1;
+ HOST_WIDE_INT left_bits = 0;
+ HOST_WIDE_INT right_bits = 0;
+ query_position_in_parallel (bitpos, bitsize, regs, start_index, end_index,
+ left_bits, right_bits);
+
+ if (start_index < 0 || end_index < 0)
+ return NULL_RTX;
+
+ machine_mode expr_mode = TYPE_MODE (TREE_TYPE (expr));
+ /* Just need one reg for the access. */
+ if (end_index != start_index)
+ return NULL_RTX;
+
+ rtx reg = XEXP (XVECEXP (regs, 0, start_index), 0);
+ /* Just need one reg for the access. */
+ if (left_bits == 0 && right_bits == 0)
+ {
+ if (GET_MODE (reg) != expr_mode)
+ reg = gen_lowpart (expr_mode, reg);
+ return reg;
+ }
+
+ /* Need to extract bitfield part reg for the access.
+ left_bits != 0 or right_bits != 0 */
+ if (has_padding && end_index == XVECLEN (regs, 0) - 1)
+ return NULL_RTX;
+ scalar_int_mode imode;
+ if (!int_mode_for_mode (expr_mode).exists (&imode))
+ return NULL_RTX;
+
+ if (expr_mode != imode
+ && known_gt (GET_MODE_SIZE (GET_MODE (regs)), UNITS_PER_WORD))
+ return NULL_RTX;
+
+ machine_mode mode = GET_MODE (reg);
+ bool sgn = TYPE_UNSIGNED (TREE_TYPE (expr));
+ rtx bfld = extract_bit_field (reg, bitsize, left_bits, sgn, NULL_RTX, mode,
+ imode, reversep, NULL);
+
+ if (GET_MODE (bfld) != imode)
+ bfld = gen_lowpart (imode, bfld);
+
+ if (expr_mode == imode)
+ return bfld;
+
+ /* expr_mode != imode, e.g. SF != SI. */
+ rtx result = gen_reg_rtx (imode);
+ emit_move_insn (result, bfld);
+ return gen_lowpart (expr_mode, result);
+}
+
+tree
+reference_alias_ptr_type (tree t);
+
+static void
+expand_ARG_PARTS (internal_fn, gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+ tree arg = gimple_call_arg (stmt, 0);
+ HOST_WIDE_INT offset = tree_to_shwi (gimple_call_arg (stmt, 1));
+ HOST_WIDE_INT size = tree_to_shwi (gimple_call_arg (stmt, 2));
+ int reversep = tree_to_shwi (gimple_call_arg (stmt, 3));
+ rtx sub_elem = get_incoming_element (arg, offset, size, reversep, lhs);
+ if (sub_elem)
+ {
+ rtx to_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ if (to_rtx)
+ {
+ gcc_assert (REG_P (to_rtx));
+ emit_move_insn (to_rtx, sub_elem);
+ return;
+ }
+ }
+ /* Fall back to normal expand method. */
+ if ((offset % BITS_PER_WORD == 0) && (size % BITS_PER_WORD == 0))
+ {
+ tree base = build_fold_addr_expr (arg);
+ tree type = reference_alias_ptr_type (arg);
+ tree off = build_int_cst (type, offset / BITS_PER_UNIT);
+ location_t loc = EXPR_LOCATION (arg);
+ tree rhs = fold_build2_loc (loc, MEM_REF, TREE_TYPE (lhs), base, off);
+ REF_REVERSE_STORAGE_ORDER (rhs) = reversep;
+ expand_assignment (lhs, rhs, false);
+ }
+ else
+ {
+ tree type = TREE_TYPE (lhs);
+ machine_mode mode = TYPE_MODE (type);
+ rtx op0
+ = expand_expr_real (arg, NULL, VOIDmode, EXPAND_NORMAL, NULL, true);
+ op0 = extract_bit_field (op0, size, offset, TYPE_UNSIGNED (type), NULL,
+ mode, mode, reversep, NULL);
+ rtx dest = expand_expr (lhs, NULL, VOIDmode, EXPAND_WRITE);
+ emit_move_insn (dest, op0);
+ }
+}
+
/* The size of an OpenACC compute dimension. */
static void
@@ -510,6 +510,9 @@ DEF_INTERNAL_FN (PHI, 0, NULL)
automatic variable. */
DEF_INTERNAL_FN (DEFERRED_INIT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
+/* A function to extract elemet(s) from an aggregate argument in fsra. */
+DEF_INTERNAL_FN (ARG_PARTS, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
+
/* DIM_SIZE and DIM_POS return the size of a particular compute
dimension and the executing thread's position within that
dimension. DIM_POS is pure (and not const) so that it isn't
@@ -1508,7 +1508,8 @@ scan_function (void)
tree t;
unsigned i;
- if (gimple_code (stmt) != GIMPLE_CALL)
+ if (gimple_code (stmt) != GIMPLE_CALL
+ || sra_mode == SRA_MODE_FINAL_INTRA)
walk_stmt_load_store_addr_ops (stmt, NULL, NULL, NULL,
scan_visit_addr);
@@ -2767,12 +2768,22 @@ analyze_access_subtree (struct access *root, struct access *parent,
hole = true;
}
+ auto check_rw = [] (struct access *root) -> bool {
+ if ((root->grp_scalar_read || root->grp_assignment_read)
+ && (root->grp_scalar_write || root->grp_assignment_write))
+ return true;
+ if (sra_mode != SRA_MODE_FINAL_INTRA)
+ return false;
+ if ((root->grp_scalar_read || root->grp_assignment_read)
+ && TREE_CODE (root->base) == PARM_DECL)
+ return true;
+ return false;
+ };
+
+ /* In fsra, parameter is scalarizable even no writing to it. */
if (allow_replacements && scalar && !root->first_child
&& (totally || !root->grp_total_scalarization)
- && (totally
- || root->grp_hint
- || ((root->grp_scalar_read || root->grp_assignment_read)
- && (root->grp_scalar_write || root->grp_assignment_write))))
+ && (totally || root->grp_hint || check_rw (root)))
{
/* Always create access replacements that cover the whole access.
For integral types this means the precision has to match.
@@ -2841,6 +2852,11 @@ analyze_access_subtree (struct access *root, struct access *parent,
root->grp_covered = 1;
else if (root->grp_write || comes_initialized_p (root->base))
root->grp_unscalarized_data = 1; /* not covered and written to */
+
+ if (sra_mode == SRA_MODE_FINAL_INTRA && root->grp_write
+ && TREE_CODE (root->base) == PARM_DECL)
+ return false;
+
return sth_created;
}
@@ -3802,7 +3818,7 @@ generate_subtree_copies (struct access *access, tree agg,
|| access->offset + access->size > start_offset))
{
tree expr, repl = get_access_replacement (access);
- gassign *stmt;
+ gimple *stmt;
expr = build_ref_for_model (loc, agg, access->offset - top_offset,
access, gsi, insert_after);
@@ -3814,7 +3830,20 @@ generate_subtree_copies (struct access *access, tree agg,
!insert_after,
insert_after ? GSI_NEW_STMT
: GSI_SAME_STMT);
- stmt = gimple_build_assign (repl, expr);
+ if (sra_mode == SRA_MODE_FINAL_INTRA
+ && TREE_CODE (access->base) == PARM_DECL
+ && (access->grp_scalar_read || access->grp_assignment_read))
+ {
+ gimple *call = gimple_build_call_internal (
+ IFN_ARG_PARTS, 4, access->base,
+ wide_int_to_tree (sizetype, access->offset),
+ wide_int_to_tree (sizetype, access->size),
+ wide_int_to_tree (sizetype, access->reverse));
+ gimple_call_set_lhs (call, repl);
+ stmt = call;
+ }
+ else
+ stmt = gimple_build_assign (repl, expr);
}
else
{
@@ -5,7 +5,7 @@
// Test that a zero-width bit field in an otherwise homogeneous aggregate
// generates a psabi warning and passes arguments in GPRs.
-// { dg-final { scan-assembler-times {\mstd\M} 4 } }
+// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } }
struct a_thing
{
new file mode 100644
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-O2 -save-temps" } */
+
+typedef struct DF
+{
+ double a[4];
+ short s1;
+ short s2;
+ short s3;
+ short s4;
+} DF;
+typedef struct SF
+{
+ float a[4];
+ int i1;
+ int i2;
+} SF;
+
+/* { dg-final { scan-assembler-times {\mmtvsrd|mtvsrws\M} 3 {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlwz\M} {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlhz\M} {target { lp64 && has_arch_pwr8 } } } } */
+
+#define NOIPA __attribute__ ((noipa))
+
+short NOIPA
+foo_hi (DF a, int flag)
+{
+ if (flag == 2)
+ return a.s2 + a.s3;
+ return 0;
+}
+int NOIPA
+foo_si (SF a, int flag)
+{
+ if (flag == 2)
+ return a.i2 + a.i1;
+ return 0;
+}
+double NOIPA
+foo_df (DF arg, int flag)
+{
+ if (flag == 2)
+ return arg.a[3];
+ else
+ return 0.0;
+}
+float NOIPA
+foo_sf (SF arg, int flag)
+{
+ if (flag == 2)
+ return arg.a[2];
+ return 0;
+}
+float NOIPA
+foo_sf1 (SF arg, int flag)
+{
+ if (flag == 2)
+ return arg.a[1];
+ return 0;
+}
+
+DF gdf = {{1.0, 2.0, 3.0, 4.0}, 1, 2, 3, 4};
+SF gsf = {{1.0f, 2.0f, 3.0f, 4.0f}, 1, 2};
+
+int
+main ()
+{
+ if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0
+ && foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0))
+ __builtin_abort ();
+ if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0
+ && foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0))
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-O2 -save-temps" } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd|mtvsrws\M} 5 {target { lp64 && { has_arch_pwr8 && be } } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 4 {target { lp64 && { has_arch_pwr8 && be } } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd|mtvsrws\M} 3 {target { lp64 && { has_arch_pwr8 && le } } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 {target { lp64 && { has_arch_pwr8 && le } } } } } */
+/* { dg-final { scan-assembler-times {\mfadds\M} 2 {target { lp64 && has_arch_pwr8 } } } } */
+
+#define NOIPA __attribute__ ((noipa))
+typedef struct X
+{
+ float x;
+ float y;
+} X;
+
+float NOIPA
+fooX (X y)
+{
+ y.x += 1;
+ return y.x + y.y;
+}
+
+typedef struct Y
+{
+ double a[4];
+ long l;
+} Y;
+
+double NOIPA
+fooY (Y arg)
+{
+ return arg.a[3];
+}
+
+typedef struct Z
+{
+ float a[4];
+ short l;
+} Z;
+
+float NOIPA
+fooZ (Z arg)
+{
+ return arg.a[3];
+}
+
+float NOIPA
+fooZ2 (Z arg)
+{
+ return arg.a[2];
+}
+
+X x = {1.0f, 2.0f};
+Y y = {1.0, 2.0, 3.0, 4.0, 1};
+Z z = {1.0f, 2.0f, 3.0f, 4.0f, 1};
+int
+main ()
+{
+ if (fooX (x) != 4.0f)
+ __builtin_abort ();
+
+ if (fooY (y) != 4.0)
+ __builtin_abort ();
+
+ if (fooZ (z) != 4.0f)
+ __builtin_abort ();
+
+ if (fooZ2 (z) != 3.0f)
+ __builtin_abort ();
+
+ return 0;
+}