@@ -95,6 +95,10 @@ public:
void execute ();
private:
+ // Whether to test only things that are required for correctness,
+ // or whether to take optimization heuristics into account as well.
+ enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
+
static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
using operand_mask = uint32_t;
@@ -452,7 +456,7 @@ private:
template<unsigned int allocno_info::*field>
static int cmp_increasing (const void *, const void *);
- bool is_chain_candidate (allocno_info *, allocno_info *);
+ bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
int rate_chain (allocno_info *, allocno_info *);
static int cmp_chain_candidates (const void *, const void *);
void chain_allocnos (unsigned int &, unsigned int &);
@@ -1588,7 +1592,7 @@ early_ra::find_related_start (allocno_info *dest_allocno,
return res;
auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
- if (!is_chain_candidate (dest_allocno, next_allocno))
+ if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
return res;
dest_allocno = next_allocno;
@@ -2011,7 +2015,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
if (allocno1->offset + 1 < allocno1->group_size
&& allocno2->offset + 1 < allocno2->group_size)
{
- if (is_chain_candidate (allocno1 + 1, allocno2 + 1))
+ if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
return 1;
else
return -1;
@@ -2019,7 +2023,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
if (allocno1->offset > 0 && allocno2->offset > 0)
{
- if (is_chain_candidate (allocno1 - 1, allocno2 - 1))
+ if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
return 1;
else
return -1;
@@ -2215,38 +2219,37 @@ early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
}
// Return true if we should consider chaining ALLOCNO1 onto the head
-// of ALLOCNO2. This is just a local test of the two allocnos; it doesn't
-// guarantee that chaining them would give a self-consistent system.
+// of ALLOCNO2. STRICTNESS says whether we should take copy-elision
+// heuristics into account, or whether we should just consider things
+// that matter for correctness.
+//
+// This is just a local test of the two allocnos; it doesn't guarantee
+// that chaining them would give a self-consistent system.
bool
-early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
+early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
+ test_strictness strictness)
{
if (allocno2->is_shared ())
return false;
- if (allocno1->is_equiv)
+ while (allocno1->is_equiv)
allocno1 = m_allocnos[allocno1->related_allocno];
if (allocno2->start_point >= allocno1->end_point
&& !allocno2->is_equiv_to (allocno1->id))
return false;
- if (allocno2->is_strong_copy_dest)
- {
- if (!allocno1->is_strong_copy_src
- || allocno1->copy_dest != allocno2->id)
- return false;
- }
- else if (allocno2->is_copy_dest)
+ if (allocno1->is_earlyclobbered
+ && allocno1->end_point == allocno2->start_point + 1)
+ return false;
+
+ if (strictness == ALL_REASONS && allocno2->is_copy_dest)
{
if (allocno1->copy_dest != allocno2->id)
return false;
- }
- else if (allocno1->is_earlyclobbered)
- {
- if (allocno1->end_point == allocno2->start_point + 1)
+ if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src)
return false;
}
-
return true;
}
@@ -2470,8 +2473,7 @@ early_ra::try_to_chain_allocnos (allocno_info *allocno1,
auto *head2 = m_allocnos[headi2];
if (head1->chain_next != INVALID_ALLOCNO)
return false;
- if (!head2->is_equiv_to (head1->id)
- && head1->end_point <= head2->start_point)
+ if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY))
return false;
}
}
@@ -2620,7 +2622,7 @@ early_ra::form_chains ()
auto *allocno2 = m_sorted_allocnos[sci];
if (allocno2->chain_prev == INVALID_ALLOCNO)
{
- if (!is_chain_candidate (allocno1, allocno2))
+ if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS))
continue;
chain_candidate_info candidate;
candidate.allocno = allocno2;
new file mode 100644
@@ -0,0 +1,57 @@
+// { dg-do run }
+// { dg-options "-O2" }
+
+#include <arm_neon.h>
+
+void __attribute__ ((noinline))
+foo (int8_t **ptr)
+{
+ int8x16_t v0 = vld1q_s8 (ptr[0]);
+ int8x16_t v1 = vld1q_s8 (ptr[1]);
+ int8x16_t v2 = vld1q_s8 (ptr[2]);
+ int8x16_t v3 = vld1q_s8 (ptr[3]);
+ int8x16_t v4 = vld1q_s8 (ptr[4]);
+
+ int8x16x4_t res0 = { v0, v1, v2, v3 };
+ vst4q_s8 (ptr[5], res0);
+
+ int8x16_t add = vaddq_s8 (v2, v3);
+ int8x16x3_t res1 = { v1, add, v3 };
+ vst3q_s8 (ptr[6], res1);
+
+ int8x16x3_t res2 = { v0, v1, v2 };
+ vst3q_s8 (ptr[7], res2);
+}
+
+int8_t arr0[16] = { 1 };
+int8_t arr1[16] = { 2 };
+int8_t arr2[16] = { 4 };
+int8_t arr3[16] = { 8 };
+int8_t arr4[16] = { 16 };
+int8_t arr5[16 * 4];
+int8_t arr6[16 * 3];
+int8_t arr7[16 * 3];
+int8_t *ptr[] =
+{
+ arr0,
+ arr1,
+ arr2,
+ arr3,
+ arr4,
+ arr5,
+ arr6,
+ arr7
+};
+
+int
+main (void)
+{
+ foo (ptr);
+ if (arr5[0] != 1 || arr5[1] != 2 || arr5[2] != 4 || arr5[3] != 8)
+ __builtin_abort ();
+ if (arr6[0] != 2 || arr6[1] != 12 || arr6[2] != 8)
+ __builtin_abort ();
+ if (arr7[0] != 1 || arr7[1] != 2 || arr7[2] != 4)
+ __builtin_abort ();
+ return 0;
+}