@@ -5245,12 +5245,16 @@ rs6000_density_test (rs6000_cost_data *data)
const int DENSITY_PCT_THRESHOLD = 85;
const int DENSITY_SIZE_THRESHOLD = 70;
const int DENSITY_PENALTY = 10;
+ const int DENSITY_LOAD_PCT_THRESHOLD = 80;
+ const int DENSITY_LOAD_FOR_CTOR_PCT_THRESHOLD = 65;
+ const int DENSITY_LOAD_SIZE_THRESHOLD = 20;
struct loop *loop = data->loop_info;
basic_block *bbs = get_loop_body (loop);
int nbbs = loop->num_nodes;
loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
int vec_cost = data->cost[vect_body], not_vec_cost = 0;
int i, density_pct;
+ unsigned int nload_total = 0, nctor_for_strided = 0, nload_for_ctor = 0;
/* Only care about cost of vector version, so exclude scalar version here. */
if (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) != (void *) data)
@@ -5272,21 +5276,83 @@ rs6000_density_test (rs6000_cost_data *data)
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_IN_PATTERN_P (stmt_info))
not_vec_cost++;
+ else
+ {
+ stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info);
+ if (STMT_VINFO_DATA_REF (vstmt_info)
+ && DR_IS_READ (STMT_VINFO_DATA_REF (vstmt_info)))
+ {
+ if (STMT_VINFO_STRIDED_P (vstmt_info))
+ {
+ unsigned int ncopies = 1;
+ unsigned int nunits = 1;
+ /* TODO: For VMAT_STRIDED_SLP, the total CTOR can be
+ fewer due to group access. Simply handle it here
+ for now. */
+ if (!STMT_SLP_TYPE (vstmt_info))
+ {
+ tree vectype = STMT_VINFO_VECTYPE (vstmt_info);
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ nunits = vect_nunits_for_cost (vectype);
+ }
+ unsigned int nloads = ncopies * nunits;
+ nload_for_ctor += nloads;
+ nload_total += nloads;
+ nctor_for_strided += ncopies;
+ }
+ else
+ nload_total++;
+ }
+ }
}
}
-
free (bbs);
- density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
- if (density_pct > DENSITY_PCT_THRESHOLD
- && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
- {
- data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "density %d%%, cost %d exceeds threshold, penalizing "
- "loop body cost by %d%%", density_pct,
- vec_cost + not_vec_cost, DENSITY_PENALTY);
+ if (vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
+ {
+ density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
+ if (density_pct > DENSITY_PCT_THRESHOLD)
+ {
+ data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "density %d%%, cost %d exceeds threshold, "
+ "penalizing loop body cost by %d%%.\n",
+ density_pct, vec_cost + not_vec_cost,
+ DENSITY_PENALTY);
+ }
+ /* For one loop which has a large proportion scalar loads of all
+ loads fed into vector construction, if the density is high,
+ the loads will have more stalls than usual, further affect
+ the vector construction. One typical case is the innermost
+ loop of the hotspot of spec2017 503.bwaves_r without loop
+ interchange. Here we price more on the related vector
+ construction and penalize the body cost. */
+ else if (density_pct > DENSITY_LOAD_PCT_THRESHOLD
+ && nload_total > DENSITY_LOAD_SIZE_THRESHOLD)
+ {
+ int load_for_ctor_pct = (nload_for_ctor * 100) / nload_total;
+ /* Large proportion of scalar loads fed to vector CTOR. */
+ if (load_for_ctor_pct > DENSITY_LOAD_FOR_CTOR_PCT_THRESHOLD)
+ {
+ vec_cost += nctor_for_strided;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Found high density loop with a large "
+ "proportion %d%% of scalar loads fed to "
+ "vector ctor, add cost %d.\n",
+ load_for_ctor_pct, nctor_for_strided);
+
+ data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "density %d%%, cost %d exceeds threshold, "
+ "penalizing loop body cost by %d%% for "
+ "load.\n",
+ density_pct, vec_cost + not_vec_cost,
+ DENSITY_PENALTY);
+ }
+ }
}
}