Message ID | 20231106065205.290215-1-juzhe.zhong@rivai.ai |
---|---|
State | New |
Headers | show |
Series | [V2] VECT: Support mask_len_strided_load/mask_len_strided_store in loop vectorize | expand |
Sorry.
This is middle-end patch, sending to wrong CC lists.
Forget about this patch.
juzhe.zhong@rivai.ai
From: Juzhe-Zhong
Date: 2023-11-06 14:52
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH V2] VECT: Support mask_len_strided_load/mask_len_strided_store in loop vectorize
This patch adds strided load/store support on loop vectorizer depending on STMT_VINFO_STRIDED_P.
Bootstrap and regression on X86 passed.
Ok for trunk ?
gcc/ChangeLog:
* internal-fn.cc (strided_load_direct): New function.
(strided_store_direct): Ditto.
(expand_strided_store_optab_fn): Ditto.
(expand_scatter_store_optab_fn): Add strided store.
(expand_strided_load_optab_fn): New function.
(expand_gather_load_optab_fn): Add strided load.
(direct_strided_load_optab_supported_p): New function.
(direct_strided_store_optab_supported_p): Ditto.
(internal_load_fn_p): Add strided load.
(internal_strided_fn_p): New function.
(internal_fn_len_index): Add strided load/store.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Add strided store.
(internal_strided_fn_supported_p): New function.
* internal-fn.def (MASK_LEN_STRIDED_LOAD): New IFN.
(MASK_LEN_STRIDED_STORE): Ditto.
* internal-fn.h (internal_strided_fn_p): New function.
(internal_strided_fn_supported_p): Ditto.
* optabs-query.cc (supports_vec_gather_load_p): Add strided load.
(supports_vec_scatter_store_p): Add strided store.
* optabs-query.h (supports_vec_gather_load_p): Add strided load.
(supports_vec_scatter_store_p): Add strided store.
* tree-vect-data-refs.cc (vect_prune_runtime_alias_test_list): Add strided load/store.
(vect_gather_scatter_fn_p): Ditto.
(vect_check_gather_scatter): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(vect_truncate_gather_scatter_offset): Ditto.
(vect_use_strided_gather_scatters_p): Ditto.
(vect_get_strided_load_store_ops): Ditto.
(vectorizable_store): Ditto.
(vectorizable_load): Ditto.
* tree-vectorizer.h (vect_gather_scatter_fn_p): Ditto.
---
gcc/internal-fn.cc | 101 ++++++++++++++++++++++++++++++++-----
gcc/internal-fn.def | 4 ++
gcc/internal-fn.h | 2 +
gcc/optabs-query.cc | 25 ++++++---
gcc/optabs-query.h | 4 +-
gcc/tree-vect-data-refs.cc | 45 +++++++++++++----
gcc/tree-vect-stmts.cc | 65 ++++++++++++++++++------
gcc/tree-vectorizer.h | 2 +-
8 files changed, 199 insertions(+), 49 deletions(-)
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c7d3564faef..a31a65755c7 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -164,6 +164,7 @@ init_internal_fns ()
#define load_lanes_direct { -1, -1, false }
#define mask_load_lanes_direct { -1, -1, false }
#define gather_load_direct { 3, 1, false }
+#define strided_load_direct { -1, -1, false }
#define len_load_direct { -1, -1, false }
#define mask_len_load_direct { -1, 4, false }
#define mask_store_direct { 3, 2, false }
@@ -172,6 +173,7 @@ init_internal_fns ()
#define vec_cond_mask_direct { 1, 0, false }
#define vec_cond_direct { 2, 0, false }
#define scatter_store_direct { 3, 1, false }
+#define strided_store_direct { 1, 1, false }
#define len_store_direct { 3, 3, false }
#define mask_len_store_direct { 4, 5, false }
#define vec_set_direct { 3, 3, false }
@@ -3561,62 +3563,87 @@ expand_LAUNDER (internal_fn, gcall *call)
expand_assignment (lhs, gimple_call_arg (call, 0), false);
}
+#define expand_strided_store_optab_fn expand_scatter_store_optab_fn
+
/* Expand {MASK_,}SCATTER_STORE{S,U} call CALL using optab OPTAB. */
static void
expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
{
+ insn_code icode;
internal_fn ifn = gimple_call_internal_fn (stmt);
int rhs_index = internal_fn_stored_value_index (ifn);
tree base = gimple_call_arg (stmt, 0);
tree offset = gimple_call_arg (stmt, 1);
- tree scale = gimple_call_arg (stmt, 2);
tree rhs = gimple_call_arg (stmt, rhs_index);
rtx base_rtx = expand_normal (base);
rtx offset_rtx = expand_normal (offset);
- HOST_WIDE_INT scale_int = tree_to_shwi (scale);
rtx rhs_rtx = expand_normal (rhs);
class expand_operand ops[8];
int i = 0;
create_address_operand (&ops[i++], base_rtx);
- create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
- create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
- create_integer_operand (&ops[i++], scale_int);
+ if (internal_strided_fn_p (ifn))
+ {
+ create_address_operand (&ops[i++], offset_rtx);
+ icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)));
+ }
+ else
+ {
+ tree scale = gimple_call_arg (stmt, 2);
+ HOST_WIDE_INT scale_int = tree_to_shwi (scale);
+ create_input_operand (&ops[i++], offset_rtx,
+ TYPE_MODE (TREE_TYPE (offset)));
+ create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
+ create_integer_operand (&ops[i++], scale_int);
+ icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
+ TYPE_MODE (TREE_TYPE (offset)));
+ }
create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
i = add_mask_and_len_args (ops, i, stmt);
- insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
- TYPE_MODE (TREE_TYPE (offset)));
expand_insn (icode, i, ops);
}
+#define expand_strided_load_optab_fn expand_gather_load_optab_fn
+
/* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB. */
static void
expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
{
+ insn_code icode;
+ internal_fn ifn = gimple_call_internal_fn (stmt);
tree lhs = gimple_call_lhs (stmt);
tree base = gimple_call_arg (stmt, 0);
tree offset = gimple_call_arg (stmt, 1);
- tree scale = gimple_call_arg (stmt, 2);
rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
rtx base_rtx = expand_normal (base);
rtx offset_rtx = expand_normal (offset);
- HOST_WIDE_INT scale_int = tree_to_shwi (scale);
int i = 0;
class expand_operand ops[8];
create_output_operand (&ops[i++], lhs_rtx, TYPE_MODE (TREE_TYPE (lhs)));
create_address_operand (&ops[i++], base_rtx);
- create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
- create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
- create_integer_operand (&ops[i++], scale_int);
+ if (internal_strided_fn_p (ifn))
+ {
+ create_address_operand (&ops[i++], offset_rtx);
+ icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)));
+ }
+ else
+ {
+ tree scale = gimple_call_arg (stmt, 2);
+ HOST_WIDE_INT scale_int = tree_to_shwi (scale);
+ create_input_operand (&ops[i++], offset_rtx,
+ TYPE_MODE (TREE_TYPE (offset)));
+ create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
+ create_integer_operand (&ops[i++], scale_int);
+ icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
+ TYPE_MODE (TREE_TYPE (offset)));
+ }
i = add_mask_and_len_args (ops, i, stmt);
- insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
- TYPE_MODE (TREE_TYPE (offset)));
expand_insn (icode, i, ops);
if (!rtx_equal_p (lhs_rtx, ops[0].value))
emit_move_insn (lhs_rtx, ops[0].value);
@@ -4012,6 +4039,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_gather_load_optab_supported_p convert_optab_supported_p
+#define direct_strided_load_optab_supported_p direct_optab_supported_p
#define direct_len_load_optab_supported_p direct_optab_supported_p
#define direct_mask_len_load_optab_supported_p convert_optab_supported_p
#define direct_mask_store_optab_supported_p convert_optab_supported_p
@@ -4020,6 +4048,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_vec_cond_mask_optab_supported_p convert_optab_supported_p
#define direct_vec_cond_optab_supported_p convert_optab_supported_p
#define direct_scatter_store_optab_supported_p convert_optab_supported_p
+#define direct_strided_store_optab_supported_p direct_optab_supported_p
#define direct_len_store_optab_supported_p direct_optab_supported_p
#define direct_mask_len_store_optab_supported_p convert_optab_supported_p
#define direct_while_optab_supported_p convert_optab_supported_p
@@ -4596,6 +4625,7 @@ internal_load_fn_p (internal_fn fn)
case IFN_GATHER_LOAD:
case IFN_MASK_GATHER_LOAD:
case IFN_MASK_LEN_GATHER_LOAD:
+ case IFN_MASK_LEN_STRIDED_LOAD:
case IFN_LEN_LOAD:
case IFN_MASK_LEN_LOAD:
return true;
@@ -4648,6 +4678,22 @@ internal_gather_scatter_fn_p (internal_fn fn)
}
}
+/* Return true if IFN is some form of strided load or strided store. */
+
+bool
+internal_strided_fn_p (internal_fn fn)
+{
+ switch (fn)
+ {
+ case IFN_MASK_LEN_STRIDED_LOAD:
+ case IFN_MASK_LEN_STRIDED_STORE:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
/* If FN takes a vector len argument, return the index of that argument,
otherwise return -1. */
@@ -4683,6 +4729,8 @@ internal_fn_len_index (internal_fn fn)
case IFN_COND_LEN_XOR:
case IFN_COND_LEN_SHL:
case IFN_COND_LEN_SHR:
+ case IFN_MASK_LEN_STRIDED_LOAD:
+ case IFN_MASK_LEN_STRIDED_STORE:
return 4;
case IFN_COND_LEN_NEG:
@@ -4776,6 +4824,10 @@ internal_fn_mask_index (internal_fn fn)
case IFN_MASK_LEN_STORE:
return 2;
+ case IFN_MASK_LEN_STRIDED_LOAD:
+ case IFN_MASK_LEN_STRIDED_STORE:
+ return 3;
+
case IFN_MASK_GATHER_LOAD:
case IFN_MASK_SCATTER_STORE:
case IFN_MASK_LEN_GATHER_LOAD:
@@ -4796,6 +4848,9 @@ internal_fn_stored_value_index (internal_fn fn)
{
switch (fn)
{
+ case IFN_MASK_LEN_STRIDED_STORE:
+ return 2;
+
case IFN_MASK_STORE:
case IFN_MASK_STORE_LANES:
case IFN_SCATTER_STORE:
@@ -4845,6 +4900,24 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
&& insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
}
+/* Return true if the target supports strided load or strided store function
+ IFN. For loads, VECTOR_TYPE is the vector type of the load result,
+ while for stores it is the vector type of the stored data argument.
+ MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded
+ or stored. */
+
+bool
+internal_strided_fn_supported_p (internal_fn ifn, tree vector_type,
+ tree memory_element_type)
+{
+ if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
+ TYPE_SIZE (memory_element_type)))
+ return false;
+ optab optab = direct_internal_fn_optab (ifn);
+ insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type));
+ return icode != CODE_FOR_nothing;
+}
+
/* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
for pointers of type TYPE when the accesses have LENGTH bytes and their
common byte alignment is ALIGN. */
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a2023ab9c3d..922359ed5cf 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -199,6 +199,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
mask_gather_load, gather_load)
DEF_INTERNAL_OPTAB_FN (MASK_LEN_GATHER_LOAD, ECF_PURE,
mask_len_gather_load, gather_load)
+DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_LOAD, ECF_PURE,
+ mask_len_strided_load, strided_load)
DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
DEF_INTERNAL_OPTAB_FN (MASK_LEN_LOAD, ECF_PURE, mask_len_load, mask_len_load)
@@ -208,6 +210,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
mask_scatter_store, scatter_store)
DEF_INTERNAL_OPTAB_FN (MASK_LEN_SCATTER_STORE, 0,
mask_len_scatter_store, scatter_store)
+DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_STORE, 0,
+ mask_len_strided_store, strided_store)
DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 7d72f4db2d0..ea6a7369f23 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -235,12 +235,14 @@ extern bool can_interpret_as_conditional_op_p (gimple *, tree *,
extern bool internal_load_fn_p (internal_fn);
extern bool internal_store_fn_p (internal_fn);
extern bool internal_gather_scatter_fn_p (internal_fn);
+extern bool internal_strided_fn_p (internal_fn);
extern int internal_fn_mask_index (internal_fn);
extern int internal_fn_len_index (internal_fn);
extern int internal_fn_else_index (internal_fn);
extern int internal_fn_stored_value_index (internal_fn);
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
tree, tree, int);
+extern bool internal_strided_fn_supported_p (internal_fn, tree, tree);
extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
poly_uint64, unsigned int);
#define VECT_PARTIAL_BIAS_UNSUPPORTED 127
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 947ccef218c..860e744995b 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -670,14 +670,18 @@ supports_vec_convert_optab_p (optab op, machine_mode mode)
for at least one vector mode. */
bool
-supports_vec_gather_load_p (machine_mode mode)
+supports_vec_gather_load_p (machine_mode mode, bool strided_p)
{
if (!this_fn_optabs->supports_vec_gather_load[mode])
this_fn_optabs->supports_vec_gather_load[mode]
= (supports_vec_convert_optab_p (gather_load_optab, mode)
- || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
- || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
- ? 1 : -1);
+ || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
+ || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
+ || (strided_p
+ && direct_optab_handler (mask_len_strided_load_optab, mode)
+ != CODE_FOR_nothing)
+ ? 1
+ : -1);
return this_fn_optabs->supports_vec_gather_load[mode] > 0;
}
@@ -687,14 +691,19 @@ supports_vec_gather_load_p (machine_mode mode)
for at least one vector mode. */
bool
-supports_vec_scatter_store_p (machine_mode mode)
+supports_vec_scatter_store_p (machine_mode mode, bool strided_p)
{
if (!this_fn_optabs->supports_vec_scatter_store[mode])
this_fn_optabs->supports_vec_scatter_store[mode]
= (supports_vec_convert_optab_p (scatter_store_optab, mode)
- || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
- || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
- ? 1 : -1);
+ || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
+ || supports_vec_convert_optab_p (mask_len_scatter_store_optab,
+ mode)
+ || (strided_p
+ && direct_optab_handler (mask_len_strided_store_optab, mode)
+ != CODE_FOR_nothing)
+ ? 1
+ : -1);
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
}
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index 920eb6a1b67..7c22edc5a78 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -191,8 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
bool can_atomic_exchange_p (machine_mode, bool);
bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
-bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
-bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
+bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, bool = false);
+bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode, bool = false);
bool can_vec_extract (machine_mode, machine_mode);
/* Version of find_widening_optab_handler_and_mode that operates on
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index d5c9c4a11c2..8662a570417 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3903,7 +3903,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
/* Check whether we can use an internal function for a gather load
or scatter store. READ_P is true for loads and false for stores.
- MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
+ MASKED_P is true if the load or store is conditional. STRIDED_P
+ is true if the load or store is strided access. MEMORY_TYPE is
the type of the memory elements being loaded or stored. OFFSET_TYPE
is the type of the offset that is being applied to the invariant
base address. SCALE is the amount by which the offset should
@@ -3914,8 +3915,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
bool
vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
- tree vectype, tree memory_type, tree offset_type,
- int scale, internal_fn *ifn_out,
+ bool strided_p, tree vectype, tree memory_type,
+ tree offset_type, int scale, internal_fn *ifn_out,
tree *offset_vectype_out)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
@@ -3926,7 +3927,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
return false;
/* Work out which function we need. */
- internal_fn ifn, alt_ifn, alt_ifn2;
+ internal_fn ifn, alt_ifn, alt_ifn2, alt_ifn3;
if (read_p)
{
ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
@@ -3935,6 +3936,12 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
use MASK_LEN_GATHER_LOAD regardless whether len and
mask are valid or not. */
alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
+ /* When target supports MASK_LEN_STRIDED_LOAD, we can relax the
+ restrictions around the relationship of the vector offset type
+ to the loaded by using a gather load with strided access.
+ E.g. a "gather" of N bytes with a 64-bit stride would in principle
+ be possible without needing an Nx64-bit vector offset type. */
+ alt_ifn3 = IFN_MASK_LEN_STRIDED_LOAD;
}
else
{
@@ -3944,10 +3951,26 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
use MASK_LEN_SCATTER_STORE regardless whether len and
mask are valid or not. */
alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
+ /* When target supports MASK_LEN_STRIDED_STORE, we can relax the
+ restrictions around the relationship of the vector offset type
+ to the stored by using a scatter store with strided access.
+ E.g. a "scatter" of N bytes with a 64-bit stride would in principle
+ be possible without needing an Nx64-bit vector offset type. */
+ alt_ifn3 = IFN_MASK_LEN_STRIDED_STORE;
}
for (;;)
{
+ /* We don't need to check whether target supports gather/scatter IFN
+ with expected vector offset for gather/scatter with a strided access
+ when target itself support strided load/store IFN. */
+ if (strided_p
+ && internal_strided_fn_supported_p (alt_ifn3, vectype, memory_type))
+ {
+ *ifn_out = alt_ifn3;
+ return true;
+ }
+
tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
if (!offset_vectype)
return false;
@@ -4030,6 +4053,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
internal_fn ifn;
tree offset_vectype;
bool masked_p = false;
+ bool strided_p = STMT_VINFO_STRIDED_P (stmt_info);
/* See whether this is already a call to a gather/scatter internal function.
If not, see whether it's a masked load or store. */
@@ -4197,12 +4221,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
supports it for at least some offset type. */
if (use_ifn_p
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ masked_p, strided_p,
+ vectype, memory_type,
signed_char_type_node,
new_scale, &ifn,
&offset_vectype)
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ masked_p, strided_p,
+ vectype, memory_type,
unsigned_char_type_node,
new_scale, &ifn,
&offset_vectype))
@@ -4226,7 +4252,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
&& TREE_CODE (off) == SSA_NAME
&& !POINTER_TYPE_P (TREE_TYPE (off))
&& vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
- masked_p, vectype, memory_type,
+ masked_p, strided_p,
+ vectype, memory_type,
TREE_TYPE (off), scale, &ifn,
&offset_vectype))
break;
@@ -4281,8 +4308,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (use_ifn_p)
{
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offtype, scale,
- &ifn, &offset_vectype))
+ strided_p, vectype, memory_type, offtype,
+ scale, &ifn, &offset_vectype))
ifn = IFN_LAST;
decl = NULL_TREE;
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f895aaf3083..907d0c0bcbb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1515,10 +1515,14 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
internal_fn len_ifn = (is_load
? IFN_MASK_LEN_GATHER_LOAD
: IFN_MASK_LEN_SCATTER_STORE);
- if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
- gs_info->memory_type,
- gs_info->offset_vectype,
- gs_info->scale))
+ if (internal_strided_fn_p (gs_info->ifn)
+ && internal_strided_fn_supported_p (gs_info->ifn, vectype,
+ gs_info->memory_type))
+ vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+ else if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
+ gs_info->memory_type,
+ gs_info->offset_vectype,
+ gs_info->scale))
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
gs_info->memory_type,
@@ -1703,6 +1707,7 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
+ STMT_VINFO_STRIDED_P (stmt_info),
vectype, memory_type, offset_type, scale,
&gs_info->ifn, &gs_info->offset_vectype)
|| gs_info->ifn == IFN_LAST)
@@ -1743,6 +1748,15 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
|| gs_info->ifn == IFN_LAST)
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
masked_p, gs_info);
+ else if (internal_strided_fn_p (gs_info->ifn))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "using strided IFN for strided/grouped access,"
+ " scale = %d\n",
+ gs_info->scale);
+ return true;
+ }
tree old_offset_type = TREE_TYPE (gs_info->offset);
tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
@@ -3012,6 +3026,14 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
*dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
}
+ /* Target supports strided load/store use DR_STEP directly. */
+ if (internal_strided_fn_p (gs_info->ifn))
+ {
+ *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo,
+ unshare_expr (DR_STEP (dr)));
+ return;
+ }
+
/* The offset given in GS_INFO can have pointer type, so use the element
type of the vector instead. */
tree offset_type = TREE_TYPE (gs_info->offset_vectype);
@@ -9130,7 +9152,7 @@ vectorizable_store (vec_info *vinfo,
vec_offset = vec_offsets[j];
tree scale = size_int (gs_info.scale);
- if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
+ if (internal_fn_len_index (gs_info.ifn) >= 0)
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
@@ -9150,10 +9172,18 @@ vectorizable_store (vec_info *vinfo,
gcall *call;
if (final_len && final_mask)
- call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
- 7, dataref_ptr, vec_offset,
- scale, vec_oprnd, final_mask,
- final_len, bias);
+ {
+ if (internal_strided_fn_p (gs_info.ifn))
+ call
+ = gimple_build_call_internal (IFN_MASK_LEN_STRIDED_STORE,
+ 6, dataref_ptr, vec_offset,
+ vec_oprnd, final_mask,
+ final_len, bias);
+ else
+ call = gimple_build_call_internal (
+ IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, vec_offset,
+ scale, vec_oprnd, final_mask, final_len, bias);
+ }
else if (final_mask)
call
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
@@ -10955,7 +10985,7 @@ vectorizable_load (vec_info *vinfo,
tree zero = build_zero_cst (vectype);
tree scale = size_int (gs_info.scale);
- if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+ if (internal_fn_len_index (gs_info.ifn) >= 0)
{
if (loop_lens)
final_len
@@ -10978,11 +11008,16 @@ vectorizable_load (vec_info *vinfo,
gcall *call;
if (final_len && final_mask)
- call
- = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
- dataref_ptr, vec_offset,
- scale, zero, final_mask,
- final_len, bias);
+ {
+ if (internal_strided_fn_p (gs_info.ifn))
+ call = gimple_build_call_internal (
+ IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset,
+ zero, final_mask, final_len, bias);
+ else
+ call = gimple_build_call_internal (
+ IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset,
+ scale, zero, final_mask, final_len, bias);
+ }
else if (final_mask)
call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
dataref_ptr, vec_offset,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 254d172231d..4d9e9799470 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2309,7 +2309,7 @@ extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
-extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
+extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, bool, tree, tree,
tree, int, internal_fn *, tree *);
extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
gather_scatter_info *);
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index c7d3564faef..a31a65755c7 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -164,6 +164,7 @@ init_internal_fns () #define load_lanes_direct { -1, -1, false } #define mask_load_lanes_direct { -1, -1, false } #define gather_load_direct { 3, 1, false } +#define strided_load_direct { -1, -1, false } #define len_load_direct { -1, -1, false } #define mask_len_load_direct { -1, 4, false } #define mask_store_direct { 3, 2, false } @@ -172,6 +173,7 @@ init_internal_fns () #define vec_cond_mask_direct { 1, 0, false } #define vec_cond_direct { 2, 0, false } #define scatter_store_direct { 3, 1, false } +#define strided_store_direct { 1, 1, false } #define len_store_direct { 3, 3, false } #define mask_len_store_direct { 4, 5, false } #define vec_set_direct { 3, 3, false } @@ -3561,62 +3563,87 @@ expand_LAUNDER (internal_fn, gcall *call) expand_assignment (lhs, gimple_call_arg (call, 0), false); } +#define expand_strided_store_optab_fn expand_scatter_store_optab_fn + /* Expand {MASK_,}SCATTER_STORE{S,U} call CALL using optab OPTAB. */ static void expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) { + insn_code icode; internal_fn ifn = gimple_call_internal_fn (stmt); int rhs_index = internal_fn_stored_value_index (ifn); tree base = gimple_call_arg (stmt, 0); tree offset = gimple_call_arg (stmt, 1); - tree scale = gimple_call_arg (stmt, 2); tree rhs = gimple_call_arg (stmt, rhs_index); rtx base_rtx = expand_normal (base); rtx offset_rtx = expand_normal (offset); - HOST_WIDE_INT scale_int = tree_to_shwi (scale); rtx rhs_rtx = expand_normal (rhs); class expand_operand ops[8]; int i = 0; create_address_operand (&ops[i++], base_rtx); - create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset))); - create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset))); - create_integer_operand (&ops[i++], scale_int); + if (internal_strided_fn_p (ifn)) + { + create_address_operand (&ops[i++], offset_rtx); + icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs))); + } + else + { + tree scale = gimple_call_arg (stmt, 2); + HOST_WIDE_INT scale_int = tree_to_shwi (scale); + create_input_operand (&ops[i++], offset_rtx, + TYPE_MODE (TREE_TYPE (offset))); + create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset))); + create_integer_operand (&ops[i++], scale_int); + icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)), + TYPE_MODE (TREE_TYPE (offset))); + } create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs))); i = add_mask_and_len_args (ops, i, stmt); - insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)), - TYPE_MODE (TREE_TYPE (offset))); expand_insn (icode, i, ops); } +#define expand_strided_load_optab_fn expand_gather_load_optab_fn + /* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB. */ static void expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab) { + insn_code icode; + internal_fn ifn = gimple_call_internal_fn (stmt); tree lhs = gimple_call_lhs (stmt); tree base = gimple_call_arg (stmt, 0); tree offset = gimple_call_arg (stmt, 1); - tree scale = gimple_call_arg (stmt, 2); rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); rtx base_rtx = expand_normal (base); rtx offset_rtx = expand_normal (offset); - HOST_WIDE_INT scale_int = tree_to_shwi (scale); int i = 0; class expand_operand ops[8]; create_output_operand (&ops[i++], lhs_rtx, TYPE_MODE (TREE_TYPE (lhs))); create_address_operand (&ops[i++], base_rtx); - create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset))); - create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset))); - create_integer_operand (&ops[i++], scale_int); + if (internal_strided_fn_p (ifn)) + { + create_address_operand (&ops[i++], offset_rtx); + icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))); + } + else + { + tree scale = gimple_call_arg (stmt, 2); + HOST_WIDE_INT scale_int = tree_to_shwi (scale); + create_input_operand (&ops[i++], offset_rtx, + TYPE_MODE (TREE_TYPE (offset))); + create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset))); + create_integer_operand (&ops[i++], scale_int); + icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)), + TYPE_MODE (TREE_TYPE (offset))); + } i = add_mask_and_len_args (ops, i, stmt); - insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)), - TYPE_MODE (TREE_TYPE (offset))); expand_insn (icode, i, ops); if (!rtx_equal_p (lhs_rtx, ops[0].value)) emit_move_insn (lhs_rtx, ops[0].value); @@ -4012,6 +4039,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_gather_load_optab_supported_p convert_optab_supported_p +#define direct_strided_load_optab_supported_p direct_optab_supported_p #define direct_len_load_optab_supported_p direct_optab_supported_p #define direct_mask_len_load_optab_supported_p convert_optab_supported_p #define direct_mask_store_optab_supported_p convert_optab_supported_p @@ -4020,6 +4048,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_vec_cond_mask_optab_supported_p convert_optab_supported_p #define direct_vec_cond_optab_supported_p convert_optab_supported_p #define direct_scatter_store_optab_supported_p convert_optab_supported_p +#define direct_strided_store_optab_supported_p direct_optab_supported_p #define direct_len_store_optab_supported_p direct_optab_supported_p #define direct_mask_len_store_optab_supported_p convert_optab_supported_p #define direct_while_optab_supported_p convert_optab_supported_p @@ -4596,6 +4625,7 @@ internal_load_fn_p (internal_fn fn) case IFN_GATHER_LOAD: case IFN_MASK_GATHER_LOAD: case IFN_MASK_LEN_GATHER_LOAD: + case IFN_MASK_LEN_STRIDED_LOAD: case IFN_LEN_LOAD: case IFN_MASK_LEN_LOAD: return true; @@ -4648,6 +4678,22 @@ internal_gather_scatter_fn_p (internal_fn fn) } } +/* Return true if IFN is some form of strided load or strided store. */ + +bool +internal_strided_fn_p (internal_fn fn) +{ + switch (fn) + { + case IFN_MASK_LEN_STRIDED_LOAD: + case IFN_MASK_LEN_STRIDED_STORE: + return true; + + default: + return false; + } +} + /* If FN takes a vector len argument, return the index of that argument, otherwise return -1. */ @@ -4683,6 +4729,8 @@ internal_fn_len_index (internal_fn fn) case IFN_COND_LEN_XOR: case IFN_COND_LEN_SHL: case IFN_COND_LEN_SHR: + case IFN_MASK_LEN_STRIDED_LOAD: + case IFN_MASK_LEN_STRIDED_STORE: return 4; case IFN_COND_LEN_NEG: @@ -4776,6 +4824,10 @@ internal_fn_mask_index (internal_fn fn) case IFN_MASK_LEN_STORE: return 2; + case IFN_MASK_LEN_STRIDED_LOAD: + case IFN_MASK_LEN_STRIDED_STORE: + return 3; + case IFN_MASK_GATHER_LOAD: case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_GATHER_LOAD: @@ -4796,6 +4848,9 @@ internal_fn_stored_value_index (internal_fn fn) { switch (fn) { + case IFN_MASK_LEN_STRIDED_STORE: + return 2; + case IFN_MASK_STORE: case IFN_MASK_STORE_LANES: case IFN_SCATTER_STORE: @@ -4845,6 +4900,24 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale))); } +/* Return true if the target supports strided load or strided store function + IFN. For loads, VECTOR_TYPE is the vector type of the load result, + while for stores it is the vector type of the stored data argument. + MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded + or stored. */ + +bool +internal_strided_fn_supported_p (internal_fn ifn, tree vector_type, + tree memory_element_type) +{ + if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)), + TYPE_SIZE (memory_element_type))) + return false; + optab optab = direct_internal_fn_optab (ifn); + insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type)); + return icode != CODE_FOR_nothing; +} + /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN for pointers of type TYPE when the accesses have LENGTH bytes and their common byte alignment is ALIGN. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a2023ab9c3d..922359ed5cf 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -199,6 +199,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE, mask_gather_load, gather_load) DEF_INTERNAL_OPTAB_FN (MASK_LEN_GATHER_LOAD, ECF_PURE, mask_len_gather_load, gather_load) +DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_LOAD, ECF_PURE, + mask_len_strided_load, strided_load) DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load) DEF_INTERNAL_OPTAB_FN (MASK_LEN_LOAD, ECF_PURE, mask_len_load, mask_len_load) @@ -208,6 +210,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0, mask_scatter_store, scatter_store) DEF_INTERNAL_OPTAB_FN (MASK_LEN_SCATTER_STORE, 0, mask_len_scatter_store, scatter_store) +DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_STORE, 0, + mask_len_strided_store, strided_store) DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store) DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes) diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 7d72f4db2d0..ea6a7369f23 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -235,12 +235,14 @@ extern bool can_interpret_as_conditional_op_p (gimple *, tree *, extern bool internal_load_fn_p (internal_fn); extern bool internal_store_fn_p (internal_fn); extern bool internal_gather_scatter_fn_p (internal_fn); +extern bool internal_strided_fn_p (internal_fn); extern int internal_fn_mask_index (internal_fn); extern int internal_fn_len_index (internal_fn); extern int internal_fn_else_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int); +extern bool internal_strided_fn_supported_p (internal_fn, tree, tree); extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree, poly_uint64, unsigned int); #define VECT_PARTIAL_BIAS_UNSUPPORTED 127 diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index 947ccef218c..860e744995b 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -670,14 +670,18 @@ supports_vec_convert_optab_p (optab op, machine_mode mode) for at least one vector mode. */ bool -supports_vec_gather_load_p (machine_mode mode) +supports_vec_gather_load_p (machine_mode mode, bool strided_p) { if (!this_fn_optabs->supports_vec_gather_load[mode]) this_fn_optabs->supports_vec_gather_load[mode] = (supports_vec_convert_optab_p (gather_load_optab, mode) - || supports_vec_convert_optab_p (mask_gather_load_optab, mode) - || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode) - ? 1 : -1); + || supports_vec_convert_optab_p (mask_gather_load_optab, mode) + || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode) + || (strided_p + && direct_optab_handler (mask_len_strided_load_optab, mode) + != CODE_FOR_nothing) + ? 1 + : -1); return this_fn_optabs->supports_vec_gather_load[mode] > 0; } @@ -687,14 +691,19 @@ supports_vec_gather_load_p (machine_mode mode) for at least one vector mode. */ bool -supports_vec_scatter_store_p (machine_mode mode) +supports_vec_scatter_store_p (machine_mode mode, bool strided_p) { if (!this_fn_optabs->supports_vec_scatter_store[mode]) this_fn_optabs->supports_vec_scatter_store[mode] = (supports_vec_convert_optab_p (scatter_store_optab, mode) - || supports_vec_convert_optab_p (mask_scatter_store_optab, mode) - || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode) - ? 1 : -1); + || supports_vec_convert_optab_p (mask_scatter_store_optab, mode) + || supports_vec_convert_optab_p (mask_len_scatter_store_optab, + mode) + || (strided_p + && direct_optab_handler (mask_len_strided_store_optab, mode) + != CODE_FOR_nothing) + ? 1 + : -1); return this_fn_optabs->supports_vec_scatter_store[mode] > 0; } diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 920eb6a1b67..7c22edc5a78 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -191,8 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool); bool can_atomic_exchange_p (machine_mode, bool); bool can_atomic_load_p (machine_mode); bool lshift_cheap_p (bool); -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode); -bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode); +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, bool = false); +bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode, bool = false); bool can_vec_extract (machine_mode, machine_mode); /* Version of find_widening_optab_handler_and_mode that operates on diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index d5c9c4a11c2..8662a570417 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -3903,7 +3903,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) /* Check whether we can use an internal function for a gather load or scatter store. READ_P is true for loads and false for stores. - MASKED_P is true if the load or store is conditional. MEMORY_TYPE is + MASKED_P is true if the load or store is conditional. STRIDED_P + is true if the load or store is strided access. MEMORY_TYPE is the type of the memory elements being loaded or stored. OFFSET_TYPE is the type of the offset that is being applied to the invariant base address. SCALE is the amount by which the offset should @@ -3914,8 +3915,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) bool vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, - tree vectype, tree memory_type, tree offset_type, - int scale, internal_fn *ifn_out, + bool strided_p, tree vectype, tree memory_type, + tree offset_type, int scale, internal_fn *ifn_out, tree *offset_vectype_out) { unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); @@ -3926,7 +3927,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, return false; /* Work out which function we need. */ - internal_fn ifn, alt_ifn, alt_ifn2; + internal_fn ifn, alt_ifn, alt_ifn2, alt_ifn3; if (read_p) { ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; @@ -3935,6 +3936,12 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, use MASK_LEN_GATHER_LOAD regardless whether len and mask are valid or not. */ alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD; + /* When target supports MASK_LEN_STRIDED_LOAD, we can relax the + restrictions around the relationship of the vector offset type + to the loaded by using a gather load with strided access. + E.g. a "gather" of N bytes with a 64-bit stride would in principle + be possible without needing an Nx64-bit vector offset type. */ + alt_ifn3 = IFN_MASK_LEN_STRIDED_LOAD; } else { @@ -3944,10 +3951,26 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, use MASK_LEN_SCATTER_STORE regardless whether len and mask are valid or not. */ alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE; + /* When target supports MASK_LEN_STRIDED_STORE, we can relax the + restrictions around the relationship of the vector offset type + to the stored by using a scatter store with strided access. + E.g. a "scatter" of N bytes with a 64-bit stride would in principle + be possible without needing an Nx64-bit vector offset type. */ + alt_ifn3 = IFN_MASK_LEN_STRIDED_STORE; } for (;;) { + /* We don't need to check whether target supports gather/scatter IFN + with expected vector offset for gather/scatter with a strided access + when target itself support strided load/store IFN. */ + if (strided_p + && internal_strided_fn_supported_p (alt_ifn3, vectype, memory_type)) + { + *ifn_out = alt_ifn3; + return true; + } + tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); if (!offset_vectype) return false; @@ -4030,6 +4053,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, internal_fn ifn; tree offset_vectype; bool masked_p = false; + bool strided_p = STMT_VINFO_STRIDED_P (stmt_info); /* See whether this is already a call to a gather/scatter internal function. If not, see whether it's a masked load or store. */ @@ -4197,12 +4221,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, supports it for at least some offset type. */ if (use_ifn_p && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), - masked_p, vectype, memory_type, + masked_p, strided_p, + vectype, memory_type, signed_char_type_node, new_scale, &ifn, &offset_vectype) && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), - masked_p, vectype, memory_type, + masked_p, strided_p, + vectype, memory_type, unsigned_char_type_node, new_scale, &ifn, &offset_vectype)) @@ -4226,7 +4252,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, && TREE_CODE (off) == SSA_NAME && !POINTER_TYPE_P (TREE_TYPE (off)) && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), - masked_p, vectype, memory_type, + masked_p, strided_p, + vectype, memory_type, TREE_TYPE (off), scale, &ifn, &offset_vectype)) break; @@ -4281,8 +4308,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, if (use_ifn_p) { if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, - vectype, memory_type, offtype, scale, - &ifn, &offset_vectype)) + strided_p, vectype, memory_type, offtype, + scale, &ifn, &offset_vectype)) ifn = IFN_LAST; decl = NULL_TREE; } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index f895aaf3083..907d0c0bcbb 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1515,10 +1515,14 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, internal_fn len_ifn = (is_load ? IFN_MASK_LEN_GATHER_LOAD : IFN_MASK_LEN_SCATTER_STORE); - if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, - gs_info->memory_type, - gs_info->offset_vectype, - gs_info->scale)) + if (internal_strided_fn_p (gs_info->ifn) + && internal_strided_fn_supported_p (gs_info->ifn, vectype, + gs_info->memory_type)) + vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); + else if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, + gs_info->memory_type, + gs_info->offset_vectype, + gs_info->scale)) vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); else if (internal_gather_scatter_fn_supported_p (ifn, vectype, gs_info->memory_type, @@ -1703,6 +1707,7 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, no narrower than OFFSET_TYPE. */ tree memory_type = TREE_TYPE (DR_REF (dr)); if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, + STMT_VINFO_STRIDED_P (stmt_info), vectype, memory_type, offset_type, scale, &gs_info->ifn, &gs_info->offset_vectype) || gs_info->ifn == IFN_LAST) @@ -1743,6 +1748,15 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, || gs_info->ifn == IFN_LAST) return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, masked_p, gs_info); + else if (internal_strided_fn_p (gs_info->ifn)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "using strided IFN for strided/grouped access," + " scale = %d\n", + gs_info->scale); + return true; + } tree old_offset_type = TREE_TYPE (gs_info->offset); tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); @@ -3012,6 +3026,14 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump); } + /* Target supports strided load/store use DR_STEP directly. */ + if (internal_strided_fn_p (gs_info->ifn)) + { + *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, + unshare_expr (DR_STEP (dr))); + return; + } + /* The offset given in GS_INFO can have pointer type, so use the element type of the vector instead. */ tree offset_type = TREE_TYPE (gs_info->offset_vectype); @@ -9130,7 +9152,7 @@ vectorizable_store (vec_info *vinfo, vec_offset = vec_offsets[j]; tree scale = size_int (gs_info.scale); - if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE) + if (internal_fn_len_index (gs_info.ifn) >= 0) { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, @@ -9150,10 +9172,18 @@ vectorizable_store (vec_info *vinfo, gcall *call; if (final_len && final_mask) - call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE, - 7, dataref_ptr, vec_offset, - scale, vec_oprnd, final_mask, - final_len, bias); + { + if (internal_strided_fn_p (gs_info.ifn)) + call + = gimple_build_call_internal (IFN_MASK_LEN_STRIDED_STORE, + 6, dataref_ptr, vec_offset, + vec_oprnd, final_mask, + final_len, bias); + else + call = gimple_build_call_internal ( + IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, vec_offset, + scale, vec_oprnd, final_mask, final_len, bias); + } else if (final_mask) call = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5, @@ -10955,7 +10985,7 @@ vectorizable_load (vec_info *vinfo, tree zero = build_zero_cst (vectype); tree scale = size_int (gs_info.scale); - if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD) + if (internal_fn_len_index (gs_info.ifn) >= 0) { if (loop_lens) final_len @@ -10978,11 +11008,16 @@ vectorizable_load (vec_info *vinfo, gcall *call; if (final_len && final_mask) - call - = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7, - dataref_ptr, vec_offset, - scale, zero, final_mask, - final_len, bias); + { + if (internal_strided_fn_p (gs_info.ifn)) + call = gimple_build_call_internal ( + IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset, + zero, final_mask, final_len, bias); + else + call = gimple_build_call_internal ( + IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset, + scale, zero, final_mask, final_len, bias); + } else if (final_mask) call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5, dataref_ptr, vec_offset, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 254d172231d..4d9e9799470 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2309,7 +2309,7 @@ extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance); extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); -extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, +extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, bool, tree, tree, tree, int, internal_fn *, tree *); extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, gather_scatter_info *);