@@ -230,9 +230,24 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2)
return mode1_size >= mode2_size ? mode1 : mode2;
}
+/* Return true if OP is invariant. */
+
+static bool
+loop_invariant_op_p (class loop *loop,
+ tree op)
+{
+ if (is_gimple_constant (op))
+ return true;
+ if (SSA_NAME_IS_DEFAULT_DEF (op)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op))))
+ return true;
+ return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1;
+}
+
/* Return true if the variable should be counted into liveness. */
static bool
-variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p)
+variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, tree var,
+ bool lhs_p)
{
if (!var)
return false;
@@ -275,6 +290,10 @@ variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p)
|| !tree_fits_shwi_p (var)
|| !IN_RANGE (tree_to_shwi (var), -16, 15)
|| gimple_assign_rhs1 (stmt) != var;
+ case LSHIFT_EXPR:
+ case RSHIFT_EXPR:
+ return gimple_assign_rhs2 (stmt) != var
+ || !loop_invariant_op_p (loop, var);
default:
break;
}
@@ -312,10 +331,12 @@ variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p)
The live range of SSA 2 is [0, 4] in bb 3. */
static machine_mode
compute_local_live_ranges (
+ loop_vec_info loop_vinfo,
const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
{
machine_mode biggest_mode = QImode;
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
if (!program_points_per_bb.is_empty ())
{
auto_vec<tree> visited_vars;
@@ -339,7 +360,8 @@ compute_local_live_ranges (
unsigned int point = program_point.point;
gimple *stmt = program_point.stmt;
tree lhs = gimple_get_lhs (stmt);
- if (variable_vectorized_p (program_point.stmt_info, lhs, true))
+ if (variable_vectorized_p (loop, program_point.stmt_info, lhs,
+ true))
{
biggest_mode = get_biggest_mode (biggest_mode,
TYPE_MODE (TREE_TYPE (lhs)));
@@ -356,7 +378,7 @@ compute_local_live_ranges (
for (i = 0; i < gimple_num_args (stmt); i++)
{
tree var = gimple_arg (stmt, i);
- if (variable_vectorized_p (program_point.stmt_info, var,
+ if (variable_vectorized_p (loop, program_point.stmt_info, var,
false))
{
biggest_mode
@@ -781,7 +803,8 @@ has_unexpected_spills_p (loop_vec_info loop_vinfo)
/* Compute local live ranges. */
hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
machine_mode biggest_mode
- = compute_local_live_ranges (program_points_per_bb, live_ranges_per_bb);
+ = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
+ live_ranges_per_bb);
/* Update live ranges according to PHI. */
update_local_live_ranges (loop_vinfo, program_points_per_bb,
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */
+
+void
+f (int *restrict a, int *restrict b, int *restrict c, int *restrict d,
+ int *restrict x, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] >> x[i];
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i] >> x[i];
+ }
+}
+
+void
+f2 (int *restrict a, int *restrict b, int *restrict c, int *restrict d,
+ int *restrict x, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] << x[i];
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i] >> x[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {e32,m4} 2 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-not {jr} } } */
+/* { dg-final { scan-assembler-not {e32,m8} } } */
+/* { dg-final { scan-assembler-not {e32,m2} } } */
+/* { dg-final { scan-assembler-not {e32,m1} } } */
+/* { dg-final { scan-assembler-times {ret} 2 } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 2 "vect" } } */
new file mode 100644
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */
+
+void
+f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int x,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] >> x;
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i] >> x;
+ }
+}
+
+void
+f2 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int x,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] << x;
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i] >> x;
+ }
+}
+
+void
+f3 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] >> 17;
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i] >> 17;
+ }
+}
+
+void
+f4 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] << 17;
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i] >> 17;
+ }
+}
+
+/* { dg-final { scan-assembler-times {e32,m8} 4 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-not {jr} } } */
+/* { dg-final { scan-assembler-not {e32,m4} } } */
+/* { dg-final { scan-assembler-not {e32,m2} } } */
+/* { dg-final { scan-assembler-not {e32,m1} } } */
+/* { dg-final { scan-assembler-times {ret} 4 } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 4 "vect" } } */