@@ -1173,6 +1173,12 @@ expand_vector_init_trailing_same_elem (rtx target,
static void
expand_const_vector (rtx target, rtx src)
{
+#ifdef ENABLE_RTL_CHECKING
+ riscv_const_expect_p* expected_pattern = NULL;
+ if (EXPECTED_CONST_PATTERN)
+ expected_pattern = get_expected_costed_type (EXPECTED_CONST_PATTERN, src);
+#endif
+
machine_mode mode = GET_MODE (target);
rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
rtx elt;
@@ -1180,6 +1186,10 @@ expand_const_vector (rtx target, rtx src)
{
if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
{
+#ifdef ENABLE_RTL_CHECKING
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_DUPLICATE_BOOL);
+#endif
gcc_assert (rtx_equal_p (elt, const0_rtx)
|| rtx_equal_p (elt, const1_rtx));
rtx ops[] = {result, src};
@@ -1189,11 +1199,20 @@ expand_const_vector (rtx target, rtx src)
we use vmv.v.i instruction. */
else if (valid_vec_immediate_p (src))
{
+#ifdef ENABLE_RTL_CHECKING
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_DUPLICATE_VMV_VI);
+#endif
rtx ops[] = {result, src};
emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
}
else
{
+#ifdef ENABLE_RTL_CHECKING
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_DUPLICATE_INT_FP);
+#endif
+
/* Emit vec_duplicate<mode> split pattern before RA so that
we could have a better optimization opportunity in LICM
which will hoist vmv.v.x outside the loop and in fwprop && combine
@@ -1230,6 +1249,10 @@ expand_const_vector (rtx target, rtx src)
rtx base, step;
if (const_vec_series_p (src, &base, &step))
{
+#ifdef ENABLE_RTL_CHECKING
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_SERIES);
+#endif
expand_vec_series (result, base, step);
if (result != target)
@@ -1323,6 +1346,10 @@ expand_const_vector (rtx target, rtx src)
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
if (builder.single_step_npatterns_p ())
{
+#ifdef ENABLE_RTL_CHECKING
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_PATTERN_SINGLE_STEP);
+#endif
/* Describe the case by choosing NPATTERNS = 4 as an example. */
insn_code icode;
@@ -1462,6 +1489,10 @@ expand_const_vector (rtx target, rtx src)
}
else if (builder.interleaved_stepped_npatterns_p ())
{
+#ifdef ENABLE_RTL_CHECKING
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_PATTERN_INTERLEAVED);
+#endif
rtx base1 = builder.elt (0);
rtx base2 = builder.elt (1);
poly_int64 step1
@@ -1564,6 +1595,13 @@ expand_const_vector (rtx target, rtx src)
if (emit_catch_all_pattern)
{
+#ifdef ENABLE_RTL_CHECKING
+ /* Ensure the vector cost emitted by riscv_const_insns expected this
+ pattern to be handled by the catch all pattern. */
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_CATCH_ALL);
+#endif
+
int nelts = XVECLEN (src, 0);
/* Optimize trailing same elements sequence:
@@ -1575,6 +1613,15 @@ expand_const_vector (rtx target, rtx src)
to/reading from the stack to initialize vectors. */
expand_vector_init_insert_elems (result, builder, nelts);
}
+ else
+ {
+#ifdef ENABLE_RTL_CHECKING
+ /* Ensure the vector cost emitted by riscv_const_insns expected this
+ pattern to be handled by an optimized pattern. */
+ if (expected_pattern)
+ gcc_assert (*expected_pattern != RVV_CATCH_ALL);
+#endif
+ }
if (result != target)
emit_move_insn (target, result);
@@ -24,6 +24,74 @@
#include "rtx-vector-builder.h"
+#ifdef ENABLE_RTL_CHECKING
+#include "hash-map.h"
+
+typedef enum
+{
+ RVV_DUPLICATE_BOOL,
+ RVV_DUPLICATE_VMV_VI,
+ RVV_DUPLICATE_INT_FP,
+ RVV_SERIES,
+ RVV_PATTERN_SINGLE_STEP,
+ RVV_PATTERN_INTERLEAVED,
+ RVV_CATCH_ALL,
+} riscv_const_expect_p;
+
+extern hash_map<rtx, vec<std::pair<const_rtx, riscv_const_expect_p>>> *EXPECTED_CONST_PATTERN;
+
+static bool insert_expected_pattern (hash_map<rtx, vec<std::pair<const_rtx, riscv_const_expect_p>>> *map,
+ rtx x, riscv_const_expect_p pattern)
+{
+ if (!map)
+ return false;
+
+ vec<std::pair<const_rtx, riscv_const_expect_p>>* expected_patterns = map->get (x);
+
+ if (expected_patterns)
+ {
+ // We already have an entry for this hash
+ for (std::pair<const_rtx, riscv_const_expect_p> expected : *expected_patterns)
+ if (expected.first == x)
+ // Duplicate costing, ignore.
+ return true;
+
+ expected_patterns->safe_push (std::make_pair(x, pattern));
+ }
+ else
+ {
+ // Create a vec to hold the entry
+ vec<std::pair<const_rtx, riscv_const_expect_p>> new_expected_patterns = vNULL;
+ new_expected_patterns.safe_push (std::make_pair(x, pattern));
+
+ // Update map's vec to non-null value
+ map->put (x, new_expected_patterns);
+ }
+
+ return true;
+}
+
+static riscv_const_expect_p* get_expected_costed_type(hash_map<rtx, vec<std::pair<const_rtx, riscv_const_expect_p>>> *map,
+ rtx x)
+{
+ if (!map)
+ return NULL;
+
+ vec<std::pair<const_rtx, riscv_const_expect_p>>* expected_patterns = map->get (x);
+ if (!expected_patterns)
+ return NULL;
+
+ // Iterate over all hash collisions
+ for (std::pair<const_rtx, riscv_const_expect_p> expected : *expected_patterns)
+ {
+ if (expected.first == x)
+ return &expected.second;
+ }
+
+ return NULL;
+}
+#endif /* ENABLE_RTL_CHECKING */
+
using namespace riscv_vector;
namespace riscv_vector {
@@ -681,6 +681,12 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
function. */
static bool riscv_save_frame_pointer;
+#ifdef ENABLE_RTL_CHECKING
+/* Global variable used in riscv-v.cc to ensure accurate costs are emitted
+ for constant vectors. */
+hash_map<rtx, vec<std::pair<const_rtx, riscv_const_expect_p>>> *EXPECTED_CONST_PATTERN = NULL;
+#endif
+
typedef enum
{
PUSH_IDX = 0,
@@ -2131,6 +2137,13 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
case CONST_VECTOR:
{
+#ifdef ENABLE_RTL_CHECKING
+ /* Used to assert we aren't mislabeling optimized/fallthrough
+ patterns and are emitting accurate costs. */
+ if (!EXPECTED_CONST_PATTERN)
+ EXPECTED_CONST_PATTERN = new hash_map<rtx, vec<std::pair<const_rtx, riscv_const_expect_p>>>;
+#endif
+
/* TODO: This is not accurate, we will need to
adapt the COST of CONST_VECTOR in the future
for the following cases:
@@ -2148,8 +2161,13 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
if (const_vec_duplicate_p (x, &elt))
{
if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
- /* Duplicate values of 0/1 can be emitted using vmv.v.i. */
- return 1;
+ {
+#ifdef ENABLE_RTL_CHECKING
+ insert_expected_pattern (EXPECTED_CONST_PATTERN, x, RVV_DUPLICATE_BOOL);
+#endif
+ /* Duplicate values of 0/1 can be emitted using vmv.v.i. */
+ return 1;
+ }
/* We don't allow CONST_VECTOR for DI vector on RV32
system since the ELT constant value can not held
@@ -2162,13 +2180,21 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
/* Constants in range -16 ~ 15 integer or 0.0 floating-point
can be emitted using vmv.v.i. */
if (valid_vec_immediate_p (x))
- return 1;
+ {
+#ifdef ENABLE_RTL_CHECKING
+ insert_expected_pattern (EXPECTED_CONST_PATTERN, x, RVV_DUPLICATE_VMV_VI);
+#endif
+ return 1;
+ }
/* Any int/FP constants can always be broadcast from a
scalar register. Loading of a floating-point
constant incurs a literal-pool access. Allow this in
order to increase vectorization possibilities. */
int n = riscv_const_insns (elt, allow_new_pseudos);
+#ifdef ENABLE_RTL_CHECKING
+ insert_expected_pattern (EXPECTED_CONST_PATTERN, x, RVV_DUPLICATE_INT_FP);
+#endif
if (CONST_DOUBLE_P (elt))
return 1 + 4; /* vfmv.v.f + memory access. */
else
@@ -2186,6 +2212,9 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
rtx base, step;
if (const_vec_series_p (x, &base, &step))
{
+#ifdef ENABLE_RTL_CHECKING
+ insert_expected_pattern (EXPECTED_CONST_PATTERN, x, RVV_SERIES);
+#endif
/* This cost is not accurate, we will need to adapt the COST
accurately according to BASE && STEP. */
return 1;
@@ -2216,6 +2245,9 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
if (builder.single_step_npatterns_p ())
{
+#ifdef ENABLE_RTL_CHECKING
+ insert_expected_pattern (EXPECTED_CONST_PATTERN, x, RVV_PATTERN_SINGLE_STEP);
+#endif
if (builder.npatterns_all_equal_p ())
{
/* TODO: This cost is not accurate. */
@@ -2229,6 +2261,9 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
}
else if (builder.interleaved_stepped_npatterns_p ())
{
+#ifdef ENABLE_RTL_CHECKING
+ insert_expected_pattern (EXPECTED_CONST_PATTERN, x, RVV_PATTERN_INTERLEAVED);
+#endif
/* TODO: This cost is not accurate. */
return 1;
}
@@ -2257,6 +2292,10 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
/* Our vslide1up/down insn def does not handle HF. */
return 0;
+#ifdef ENABLE_RTL_CHECKING
+ insert_expected_pattern (EXPECTED_CONST_PATTERN, x, RVV_CATCH_ALL);
+#endif
+
/* We already checked for a fully const vector above. Calculate
the number of leading/trailing elements covered by the splat. */
int leading_ndups = 1;
This patch adds some advanced checking to assert that the emitted costs match emitted patterns for const_vecs. Flow: Costing: Insert into hashmap<rtx, vec<(const_rtx, enum)>> Expand: Check for membership in hashmap -> Not in hashmap: ignore, this wasn't costed -> In hashmap: Iterate over vec -> if RTX not in hashmap: Ignore, this wasn't costed (hash collision) -> if RTX in hashmap: Assert enum is expected There are no false positive asserts with this flow. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Add RTL_CHECKING gated asserts. * config/riscv/riscv.cc (riscv_const_insns): Ditto. * config/riscv/riscv-v.h (insert_expected_pattern): Add helper function to insert hash collisions into hash map vec key. (get_expected_costed_type): Add helper function to get the expected cost type for a given rtx pattern. Signed-off-by: Patrick O'Neill <patrick@rivosinc.com> --- Was rfc: https://inbox.sourceware.org/gcc-patches/054f4f37-9615-4e01-940e-0cf4d188fcdb@gmail.com/T/#t While I think it's extremely valuable I'd be open to dropping it if there's strong opposition to it. I'm not sure how often people run with checking enabled but this seems likely to bitrot if the answer is not often. Maybe a sign to set up some weekly rtl-checking postcommit runs? With this patch (without the ifdefs) the riscv rv64gcv testsuite on a 32 core machine took: 36689.15s user 7398.57s system 2751% cpu 26:42.05 total max memory: 844 MB Without this patch: 35510.99s user 7157.93s system 2772% cpu 25:39.21 total max memory: 844 MB The hash map is never explicitly freed by GCC. --- gcc/config/riscv/riscv-v.cc | 47 +++++++++++++++++++++++++ gcc/config/riscv/riscv-v.h | 68 +++++++++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.cc | 45 ++++++++++++++++++++++-- 3 files changed, 157 insertions(+), 3 deletions(-) -- 2.34.1