new file mode 100644
@@ -0,0 +1,106 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size) __attribute__((weak));
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < size; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ for (int i = 0; i < size; i++)
+ {
+ a[i] = i;
+ b[i] = -i;
+ c[i] = 0;
+ asm volatile("": : :"memory");
+ }
+ a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+ int *a;
+ int *b;
+ int *c;
+ int i;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+
+ init_data (a, b, c, SIZE);
+ test_citer (a, b, c);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, SIZE);
+ test_viter (a, b, c, SIZE);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,134 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ if (a[i] > 0)
+ b[i] = a[i] + c[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < size; i++)
+ if (a[i] > 0)
+ b[i] = a[i] + c[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ if (i % 2)
+ {
+ a[i] = i;
+ b[i] = 0;
+ c[i] = 2 * i;
+ }
+ else
+ {
+ a[i] = -i;
+ b[i] = i;
+ c[i] = 0;
+ }
+ asm volatile("": : :"memory");
+ }
+ a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+ int *a;
+ int *b;
+ int *c;
+ int i;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+
+ init_data (a, b, c, SIZE);
+ test_citer (a, b, c);
+ for (i = 0; i < SIZE; i++)
+ if (a[i] > 0)
+ {
+ if (b[i] != a[i] + c[i])
+ __builtin_abort ();
+ }
+ else
+ {
+ if (b[i] != i)
+ __builtin_abort ();
+ }
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, SIZE);
+ test_viter (a, b, c, SIZE);
+ for (i = 0; i < SIZE; i++)
+ if (a[i] > 0)
+ {
+ if (b[i] != a[i] + c[i])
+ __builtin_abort ();
+ }
+ else
+ {
+ if (b[i] != i)
+ __builtin_abort ();
+ }
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+int __attribute__((noinline))
+test_citer (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ int res = 0;
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ res += a[i] + b[i] * c[i];
+
+ return res;
+}
+
+int __attribute__((noinline))
+test_viter (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int res = 0;
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < size; i++)
+ res += a[i] + b[i] * c[i];
+
+ return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ a[i] = i;
+ b[i] = -i;
+ c[i] = 1;
+ asm volatile("": : :"memory");
+ }
+ a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+ int *a;
+ int *b;
+ int *c;
+ int res;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+
+ init_data (a, b, c, SIZE);
+ res = test_citer (a, b, c);
+ if (res != 0)
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, SIZE);
+ res = test_viter (a, b, c, SIZE);
+ if (res != 0)
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,122 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+int __attribute__((noinline))
+test_citer (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ int res = 0;
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ if (a[i] > 0)
+ res += b[i] + c[i];
+
+ return res;
+}
+
+int __attribute__((noinline))
+test_viter (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int res = 0;
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < size; i++)
+ if (a[i] > 0)
+ res += b[i] + c[i];
+
+ return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ if (i % 2)
+ {
+ a[i] = i;
+ b[i] = -i*2;
+ c[i] = i*2;
+ }
+ else
+ {
+ a[i] = -i;
+ b[i] = i;
+ c[i] = 10;
+ }
+ asm volatile("": : :"memory");
+ }
+ a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+ int *a;
+ int *b;
+ int *c;
+ int res;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+
+ init_data (a, b, c, SIZE);
+ res = test_citer (a, b, c);
+ if (res != 0)
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, SIZE);
+ res = test_viter (a, b, c, SIZE);
+ if (res != 0)
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ long long i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ long long i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < size; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ a[i] = i;
+ b[i] = -i;
+ c[i] = 0;
+ asm volatile("": : :"memory");
+ }
+ a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+ int *a;
+ int *b;
+ int *c;
+ long long i;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+
+ init_data (a, b, c, SIZE);
+ test_citer (a, b, c);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, SIZE);
+ test_viter (a, b, c, SIZE);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (long long * __restrict__ a,
+ long long * __restrict__ b,
+ long long * __restrict__ c)
+{
+ int i;
+
+ a = (long long *)__builtin_assume_aligned (a, ALIGN);
+ b = (long long *)__builtin_assume_aligned (b, ALIGN);
+ c = (long long *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (long long * __restrict__ a,
+ long long * __restrict__ b,
+ long long * __restrict__ c,
+ int size)
+{
+ int i;
+
+ a = (long long *)__builtin_assume_aligned (a, ALIGN);
+ b = (long long *)__builtin_assume_aligned (b, ALIGN);
+ c = (long long *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < size; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (long long * __restrict__ a,
+ long long * __restrict__ b,
+ long long * __restrict__ c,
+ int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ a[i] = i;
+ b[i] = -i;
+ c[i] = 0;
+ asm volatile("": : :"memory");
+ }
+ a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+ long long *a;
+ long long *b;
+ long long *c;
+ int i;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+ return;
+
+ init_data (a, b, c, SIZE);
+ test_citer (a, b, c);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, SIZE);
+ test_viter (a, b, c, SIZE);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,155 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+double __attribute__((noinline))
+test_citer (int * __restrict__ a,
+ long long * __restrict__ b,
+ float * __restrict__ c,
+ double * __restrict__ d)
+{
+ double res = 0;
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (long long *)__builtin_assume_aligned (b, ALIGN);
+ c = (float *)__builtin_assume_aligned (c, ALIGN);
+ d = (double *)__builtin_assume_aligned (d, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ {
+ a[i] = c[i] + 1;
+ if (b[i] < 0)
+ res += d[i];
+ }
+
+ return res;
+}
+
+double __attribute__((noinline))
+test_viter (int * __restrict__ a,
+ long long * __restrict__ b,
+ float * __restrict__ c,
+ double * __restrict__ d,
+ int size)
+{
+ double res = 0;
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (long long *)__builtin_assume_aligned (b, ALIGN);
+ c = (float *)__builtin_assume_aligned (c, ALIGN);
+ d = (double *)__builtin_assume_aligned (d, ALIGN);
+
+ for (i = 0; i < size; i++)
+ {
+ a[i] = c[i] + 1;
+ if (b[i] < 0)
+ res += d[i];
+ }
+
+ return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+ long long * __restrict__ b,
+ float * __restrict__ c,
+ double * __restrict__ d,
+ int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ if (i % 2)
+ {
+ a[i] = 0;
+ b[i] = i;
+ c[i] = 2.5;
+ d[i] = 1;
+ }
+ else
+ {
+ a[i] = 0;
+ b[i] = -i;
+ c[i] = 2.5;
+ d[i] = -1;
+ }
+ asm volatile("": : :"memory");
+ }
+ a[size] = (int)size;
+ b[size] = (long long)size;
+ c[size] = (float)size;
+ d[size] = (double)size;
+}
+
+void __attribute__((noinline))
+run_test ()
+{
+ int *a;
+ long long *b;
+ float *c;
+ double *d;
+ double res;
+ int i;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (float)) != 0)
+ return;
+ if (posix_memalign ((void **)&d, ALIGN, (SIZE + 1) * sizeof (double)) != 0)
+ return;
+
+ init_data (a, b, c, d, SIZE);
+ res = test_citer (a, b, c, d);
+ res += SIZE / 2;
+ if (res > 0.01 || res < -0.01)
+ __builtin_abort ();
+ for (i = 0; i < SIZE; i++)
+ if (a[i] != 3)
+ __builtin_abort ();
+ if (a[SIZE] != (int)SIZE
+ || b[SIZE] != (long long)SIZE
+ || c[SIZE] != (float)SIZE
+ || d[SIZE] != (double)SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, d, SIZE);
+ res = test_viter (a, b, c, d, SIZE);
+ res += SIZE / 2;
+ if (res > 0.01 || res < -0.01)
+ __builtin_abort ();
+ for (i = 0; i < SIZE; i++)
+ if (a[i] != 3)
+ __builtin_abort ();
+ if (a[SIZE] != (int)SIZE
+ || b[SIZE] != (long long)SIZE
+ || c[SIZE] != (float)SIZE
+ || d[SIZE] != (double)SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 33
+#define ALIGN 64
+
+void
+test (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ c[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE COMBINED \\(VS=64\\)" "vect" } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-1.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-2.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-3.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-4.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-5.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-6.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-7.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 31
+#define ALIGN 64
+
+void
+test (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ c[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#include "vect-tail-combine-9.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=64\\)" "vect" { target { i?86-*-* x86_64-*-* } } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-1.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-2.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-3.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-4.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-5.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-6.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-7.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
@@ -1596,6 +1596,36 @@ proc check_avx_hw_available { } {
}]
}
+# Return 1 if the target supports executing AVX2 instructions, 0
+# otherwise. Cache the result.
+
+proc check_avx2_hw_available { } {
+ return [check_cached_effective_target avx2_hw_available {
+ # If this is not the right target then we can skip the test.
+ if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } {
+ expr 0
+ } else {
+ check_runtime_nocache avx2_hw_available {
+ #include "cpuid.h"
+ int main ()
+ {
+ unsigned int eax, ebx, ecx, edx;
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ || ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
+ return 1;
+
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 1;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ return (ebx & bit_AVX2) != bit_AVX2;
+ }
+ } ""
+ }
+ }]
+}
+
# Return 1 if the target supports running SSE executables, 0 otherwise.
proc check_effective_target_sse_runtime { } {
@@ -1629,6 +1659,17 @@ proc check_effective_target_avx_runtime { } {
return 0
}
+# Return 1 if the target supports running AVX2 executables, 0 otherwise.
+
+proc check_effective_target_avx2_runtime { } {
+ if { [check_effective_target_avx2]
+ && [check_avx2_hw_available]
+ && [check_avx_os_support_available] } {
+ return 1
+ }
+ return 0
+}
+
# Return 1 if we are compiling for 64-bit PowerPC but we do not use direct
# move instructions for moves from GPR to FPR.