@@ -0,0 +1,76 @@
+/* { dg-do run } */
+
+/* Like imperfect1.c, but also includes loop transforms. */
+
+static int f1count[3], f2count[3];
+
+int
+f1 (int depth, int iter)
+{
+ f1count[depth]++;
+ return iter;
+}
+
+int
+f2 (int depth, int iter)
+{
+ f2count[depth]++;
+ return iter;
+}
+
+void
+s1 (int a1, int a2, int a3)
+{
+ int i, j, k;
+
+ #pragma omp for collapse(2) private (j, k)
+ for (i = 0; i < a1; i++)
+ {
+ f1 (0, i);
+ for (j = 0; j < a2; j++)
+ {
+ f1 (1, j);
+ #pragma omp unroll partial
+ for (k = 0; k < a3; k++)
+ {
+ f1 (2, k);
+ f2 (2, k);
+ }
+ f2 (1, j);
+ }
+ f2 (0, i);
+ }
+}
+
+int
+main ()
+{
+ f1count[0] = 0;
+ f1count[1] = 0;
+ f1count[2] = 0;
+ f2count[0] = 0;
+ f2count[1] = 0;
+ f2count[2] = 0;
+
+ s1 (3, 4, 5);
+
+ /* All intervening code at the same depth must be executed the same
+ number of times. */
+ if (f1count[0] != f2count[0]) __builtin_abort ();
+ if (f1count[1] != f2count[1]) __builtin_abort ();
+ if (f1count[2] != f2count[2]) __builtin_abort ();
+
+ /* Intervening code must be executed at least as many times as the loop
+ that encloses it. */
+ if (f1count[0] < 3) __builtin_abort ();
+ if (f1count[1] < 3 * 4) __builtin_abort ();
+
+ /* Intervening code must not be executed more times than the number
+ of logical iterations. */
+ if (f1count[0] > 3 * 4 * 5) __builtin_abort ();
+ if (f1count[1] > 3 * 4 * 5) __builtin_abort ();
+
+ /* Check that the innermost loop body is executed exactly the number
+ of logical iterations expected. */
+ if (f1count[2] != 3 * 4 * 5) __builtin_abort ();
+}
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+
+/* Like imperfect1.c, but also includes loop transforms. */
+
+static int f1count[3], f2count[3];
+
+int
+f1 (int depth, int iter)
+{
+ f1count[depth]++;
+ return iter;
+}
+
+int
+f2 (int depth, int iter)
+{
+ f2count[depth]++;
+ return iter;
+}
+
+void
+s1 (int a1, int a2, int a3)
+{
+ int i, j, k;
+
+ #pragma omp for collapse(2) private (j, k)
+ for (i = 0; i < a1; i++)
+ {
+ f1 (0, i);
+ for (j = 0; j < a2; j++)
+ {
+ f1 (1, j);
+ #pragma omp tile sizes(5)
+ for (k = 0; k < a3; k++)
+ {
+ f1 (2, k);
+ f2 (2, k);
+ }
+ f2 (1, j);
+ }
+ f2 (0, i);
+ }
+}
+
+int
+main ()
+{
+ f1count[0] = 0;
+ f1count[1] = 0;
+ f1count[2] = 0;
+ f2count[0] = 0;
+ f2count[1] = 0;
+ f2count[2] = 0;
+
+ s1 (3, 4, 5);
+
+ /* All intervening code at the same depth must be executed the same
+ number of times. */
+ if (f1count[0] != f2count[0]) __builtin_abort ();
+ if (f1count[1] != f2count[1]) __builtin_abort ();
+ if (f1count[2] != f2count[2]) __builtin_abort ();
+
+ /* Intervening code must be executed at least as many times as the loop
+ that encloses it. */
+ if (f1count[0] < 3) __builtin_abort ();
+ if (f1count[1] < 3 * 4) __builtin_abort ();
+
+ /* Intervening code must not be executed more times than the number
+ of logical iterations. */
+ if (f1count[0] > 3 * 4 * 5) __builtin_abort ();
+ if (f1count[1] > 3 * 4 * 5) __builtin_abort ();
+
+ /* Check that the innermost loop body is executed exactly the number
+ of logical iterations expected. */
+ if (f1count[2] != 3 * 4 * 5) __builtin_abort ();
+}
@@ -0,0 +1,82 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifndef FUN_NAME_SUFFIX
+#define FUN_NAME_SUFFIX
+#endif
+
+#ifdef MULT
+#undef MULT
+#endif
+#define MULT CAT(mult, FUN_NAME_SUFFIX)
+
+#ifdef MAIN
+#undef MAIN
+#endif
+#define MAIN CAT(main, FUN_NAME_SUFFIX)
+
+void
+MULT (float *matrix1, float *matrix2, float *result,
+ unsigned dim0, unsigned dim1)
+{
+ memset (result, 0, sizeof (float) * dim0 * dim1);
+ DIRECTIVE
+ TRANSFORMATION1
+ for (unsigned i = 0; i < dim0; i++)
+ TRANSFORMATION2
+ for (unsigned j = 0; j < dim1; j++)
+ TRANSFORMATION3
+ for (unsigned k = 0; k < dim1; k++)
+ result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
+}
+
+int
+MAIN ()
+{
+ unsigned dim0 = 20;
+ unsigned dim1 = 20;
+
+ float *result = (float *) malloc (sizeof (float) * dim0 * dim1);
+ float *matrix1 = (float *) malloc (sizeof (float) * dim0 * dim1);
+ float *matrix2 = (float *) malloc (sizeof (float) * dim0 * dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ matrix1[i * dim1 + j] = j;
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ if (i == j)
+ matrix2[i * dim1 + j] = 1;
+ else
+ matrix2[i * dim1 + j] = 0;
+
+ MULT (matrix1, matrix2, result, dim0, dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ {
+ if (matrix1[i * dim1 + j] != result[i * dim1 + j])
+ {
+ print_matrix (matrix1, dim0, dim1);
+ print_matrix (matrix2, dim0, dim1);
+ print_matrix (result, dim0, dim1);
+ fprintf (stderr, "%s: ERROR at %d, %d\n", __FUNCTION__, i, j);
+ abort ();
+ }
+ }
+
+ free (matrix2);
+ free (matrix1);
+ free (result);
+
+ return 0;
+}
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
@@ -0,0 +1,85 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifndef FUN_NAME_SUFFIX
+#define FUN_NAME_SUFFIX
+#endif
+
+#ifdef MULT
+#undef MULT
+#endif
+#define MULT CAT(mult, FUN_NAME_SUFFIX)
+
+#ifdef MAIN
+#undef MAIN
+#endif
+#define MAIN CAT(main, FUN_NAME_SUFFIX)
+
+void
+MULT (float *matrix1, float *matrix2, float *result)
+{
+ const unsigned dim0 = 20;
+ const unsigned dim1 = 20;
+
+ memset (result, 0, sizeof (float) * dim0 * dim1);
+ DIRECTIVE
+ TRANSFORMATION1
+ for (unsigned i = 0; i < dim0; i++)
+ TRANSFORMATION2
+ for (unsigned j = 0; j < dim1; j++)
+ TRANSFORMATION3
+ for (unsigned k = 0; k < dim1; k++)
+ result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
+}
+
+int
+MAIN ()
+{
+ const unsigned dim0 = 20;
+ const unsigned dim1 = 20;
+
+ float *result = (float *) malloc (sizeof (float) * dim0 * dim1);
+ float *matrix1 = (float *) malloc (sizeof (float) * dim0 * dim1);
+ float *matrix2 = (float *) malloc (sizeof (float) * dim0 * dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ matrix1[i * dim1 + j] = j;
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ if (i == j)
+ matrix2[i * dim1 + j] = 1;
+ else
+ matrix2[i * dim1 + j] = 0;
+
+ MULT (matrix1, matrix2, result);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ {
+ if (matrix1[i * dim1 + j] != result[i * dim1 + j])
+ {
+ printf ("%s: error at %d, %d\n", __FUNCTION__, i, j);
+ print_matrix (matrix1, dim0, dim1);
+ print_matrix (matrix2, dim0, dim1);
+ print_matrix (result, dim0, dim1);
+ printf ("\n");
+ abort ();
+ }
+ }
+
+ free (matrix2);
+ free (matrix1);
+ free (result);
+
+ return 0;
+}
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#define CAT(x,y) XCAT(x,y)
+#define XCAT(x,y) x ## y
+#define DO_PRAGMA(x) XDO_PRAGMA(x)
+#define XDO_PRAGMA(x) _Pragma (#x)
+
+void
+print_matrix (float *matrix, unsigned dim0, unsigned dim1)
+{
+ for (unsigned i = 0; i < dim0; i++)
+ {
+ for (unsigned j = 0; j < dim1; j++)
+ fprintf (stderr, "%f ", matrix[i * dim1 + j]);
+ fprintf (stderr, "\n");
+ }
+ fprintf (stderr, "\n");
+}
@@ -0,0 +1,11 @@
+/* { dg-additional-options "-fdump-tree-original -Wall -Wno-unknown-pragmas" } */
+
+#undef COMMON_DIRECTIVE
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "unroll partial" 12 "original" } } */
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O2 -fdump-tree-original -Wall -Wno-unknown-pragmas" } */
+
+#undef COMMON_DIRECTIVE
+#define COMMON_TOP_TRANSFORM omp unroll full
+#define COLLAPSE_1
+#define COLLAPSE_2
+#define COLLAPSE_3
+#define IMPLEMENTATION_FILE "matrix-constant-iter.h"
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "unroll full" 13 "original" } } */
@@ -0,0 +1,8 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE "omp teams distribute parallel for"
+#define COLLAPSE_1 "collapse(1)"
+#define COLLAPSE_2 "collapse(2)"
+#define COLLAPSE_3 "collapse(3)"
+
+#include "matrix-transform-variants-1.h"
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-fdump-tree-original -Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
+
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp for" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 12 "original" } } */
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-fdump-tree-original -Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp parallel for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
+
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp parallel" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 9 "original" } } */
@@ -0,0 +1,8 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp parallel masked taskloop
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
@@ -0,0 +1,8 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp parallel masked taskloop simd
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
@@ -0,0 +1,15 @@
+/* This test appears to have too much parallelism to run without a GPU. */
+/* { dg-do run { target { offload_device } } } */
+/* { dg-additional-options "-fdump-tree-original -Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp target parallel for map(tofrom:result[0:dim0 * dim1]) map(to:matrix1[0:dim0 * dim1], matrix2[0:dim0 * dim1])
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp target" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 9 "original" } } */
+/* { dg-final { scan-tree-dump-times "unroll partial" 12 "original" } } */
@@ -0,0 +1,10 @@
+/* This test appears to have too much parallelism to run without a GPU. */
+/* { dg-do run { target { offload_device } } } */
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp target teams distribute parallel for map(tofrom:result[:dim0 * dim1]) map(to:matrix1[0:dim0 * dim1], matrix2[0:dim0 * dim1])
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
@@ -0,0 +1,8 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp taskloop
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
@@ -0,0 +1,8 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp teams distribute parallel for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
@@ -0,0 +1,8 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE omp simd
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
@@ -0,0 +1,171 @@
+#include "matrix-helper.h"
+
+#ifndef COMMON_TOP_TRANSFORM
+#define COMMON_TOP_TRANSFORM
+#endif
+
+#ifndef IMPLEMENTATION_FILE
+#define IMPLEMENTATION_FILE "matrix-1.h"
+#endif
+
+#define FUN_NAME_SUFFIX 1
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp unroll partial(2)") _Pragma("omp tile sizes(10)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 2
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8,16,4)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 3
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 4
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 5
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 6
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(10)") _Pragma("omp unroll partial(2)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 7
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp unroll partial(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 8
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 9
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 10
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 11
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2 _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 12
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2
+#define TRANSFORMATION3 _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#include IMPLEMENTATION_FILE
+
+#define FUN_NAME_SUFFIX 13
+#ifdef COMMON_DIRECTIVE
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#else
+#define DIRECTIVE
+#endif
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7,8)")
+#define TRANSFORMATION3 _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#include IMPLEMENTATION_FILE
+
+int
+main ()
+{
+ main1 ();
+ main2 ();
+ main3 ();
+ main4 ();
+ main5 ();
+ main6 ();
+ main7 ();
+ main8 ();
+ main9 ();
+ main10 ();
+ main11 ();
+ main12 ();
+ main13 ();
+ return 0;
+}
@@ -0,0 +1,79 @@
+/* { dg-do run } */
+
+/* Like imperfect-transform.c, but enables offloading. */
+
+static int f1count[3], f2count[3];
+#pragma omp declare target enter (f1count, f2count)
+
+int
+f1 (int depth, int iter)
+{
+ #pragma omp atomic
+ f1count[depth]++;
+ return iter;
+}
+
+int
+f2 (int depth, int iter)
+{
+ #pragma omp atomic
+ f2count[depth]++;
+ return iter;
+}
+
+void
+s1 (int a1, int a2, int a3)
+{
+ int i, j, k;
+
+ #pragma omp target parallel for collapse(2) map(always, tofrom:f1count, f2count) private (j, k)
+ for (i = 0; i < a1; i++)
+ {
+ f1 (0, i);
+ for (j = 0; j < a2; j++)
+ {
+ f1 (1, j);
+ #pragma omp unroll partial
+ for (k = 0; k < a3; k++)
+ {
+ f1 (2, k);
+ f2 (2, k);
+ }
+ f2 (1, j);
+ }
+ f2 (0, i);
+ }
+}
+
+int
+main ()
+{
+ f1count[0] = 0;
+ f1count[1] = 0;
+ f1count[2] = 0;
+ f2count[0] = 0;
+ f2count[1] = 0;
+ f2count[2] = 0;
+
+ s1 (3, 4, 5);
+
+ /* All intervening code at the same depth must be executed the same
+ number of times. */
+ if (f1count[0] != f2count[0]) __builtin_abort ();
+ if (f1count[1] != f2count[1]) __builtin_abort ();
+ if (f1count[2] != f2count[2]) __builtin_abort ();
+
+ /* Intervening code must be executed at least as many times as the loop
+ that encloses it. */
+ if (f1count[0] < 3) __builtin_abort ();
+ if (f1count[1] < 3 * 4) __builtin_abort ();
+
+ /* Intervening code must not be executed more times than the number
+ of logical iterations. */
+ if (f1count[0] > 3 * 4 * 5) __builtin_abort ();
+ if (f1count[1] > 3 * 4 * 5) __builtin_abort ();
+
+ /* Check that the innermost loop body is executed exactly the number
+ of logical iterations expected. */
+ if (f1count[2] != 3 * 4 * 5) __builtin_abort ();
+}
@@ -0,0 +1,79 @@
+/* { dg-do run } */
+
+/* Like imperfect-transform.c, but enables offloading. */
+
+static int f1count[3], f2count[3];
+#pragma omp declare target enter (f1count, f2count)
+
+int
+f1 (int depth, int iter)
+{
+ #pragma omp atomic
+ f1count[depth]++;
+ return iter;
+}
+
+int
+f2 (int depth, int iter)
+{
+ #pragma omp atomic
+ f2count[depth]++;
+ return iter;
+}
+
+void
+s1 (int a1, int a2, int a3)
+{
+ int i, j, k;
+
+ #pragma omp target parallel for collapse(2) map(always, tofrom:f1count, f2count) private (j, k)
+ for (i = 0; i < a1; i++)
+ {
+ f1 (0, i);
+ for (j = 0; j < a2; j++)
+ {
+ f1 (1, j);
+ #pragma omp tile sizes(5)
+ for (k = 0; k < a3; k++)
+ {
+ f1 (2, k);
+ f2 (2, k);
+ }
+ f2 (1, j);
+ }
+ f2 (0, i);
+ }
+}
+
+int
+main ()
+{
+ f1count[0] = 0;
+ f1count[1] = 0;
+ f1count[2] = 0;
+ f2count[0] = 0;
+ f2count[1] = 0;
+ f2count[2] = 0;
+
+ s1 (3, 4, 5);
+
+ /* All intervening code at the same depth must be executed the same
+ number of times. */
+ if (f1count[0] != f2count[0]) __builtin_abort ();
+ if (f1count[1] != f2count[1]) __builtin_abort ();
+ if (f1count[2] != f2count[2]) __builtin_abort ();
+
+ /* Intervening code must be executed at least as many times as the loop
+ that encloses it. */
+ if (f1count[0] < 3) __builtin_abort ();
+ if (f1count[1] < 3 * 4) __builtin_abort ();
+
+ /* Intervening code must not be executed more times than the number
+ of logical iterations. */
+ if (f1count[0] > 3 * 4 * 5) __builtin_abort ();
+ if (f1count[1] > 3 * 4 * 5) __builtin_abort ();
+
+ /* Check that the innermost loop body is executed exactly the number
+ of logical iterations expected. */
+ if (f1count[2] != 3 * 4 * 5) __builtin_abort ();
+}
@@ -0,0 +1,70 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+int
+compute_sum1 (void)
+{
+ int sum = 0;
+ int i, j;
+
+ #pragma omp parallel for reduction(+:sum) lastprivate(j)
+ #pragma omp unroll partial
+ for (i = 3; i < 10; ++i)
+ for (j = -2; j < 7; ++j)
+ sum++;
+
+ if (j != 7)
+ __builtin_abort ();
+
+ return sum;
+}
+
+int
+compute_sum2 (void)
+{
+ int sum = 0;
+ int i, j;
+
+ #pragma omp parallel for reduction(+:sum) lastprivate(j)
+ #pragma omp unroll partial(5)
+ for (i = 3; i < 10; ++i)
+ for (j = -2; j < 7; ++j)
+ sum++;
+
+ if (j != 7)
+ __builtin_abort ();
+
+ return sum;
+}
+
+int
+compute_sum3 (void)
+{
+ int sum = 0;
+ int i, j;
+
+ #pragma omp parallel for reduction(+:sum) lastprivate(j)
+ #pragma omp unroll partial(1)
+ for (i = 3; i < 10; ++i)
+ for (j = -2; j < 7; ++j)
+ sum++;
+
+ if (j != 7)
+ __builtin_abort ();
+
+ return sum;
+}
+
+int
+main ()
+{
+ if (compute_sum1 () != 7 * 9)
+ __builtin_abort ();
+
+ if (compute_sum2 () != 7 * 9)
+ __builtin_abort ();
+
+ if (compute_sum3 () != 7 * 9)
+ __builtin_abort ();
+
+ return 0;
+}
@@ -0,0 +1,118 @@
+/* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
+
+void
+test1 (void)
+{
+ int sum = 0;
+
+ for (int i = -3; i != 1; ++i)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ __builtin_abort ();
+}
+
+void
+test2 (void)
+{
+ int sum = 0;
+
+ #pragma omp unroll partial
+ for (int i = -3; i != 1; ++i)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ __builtin_abort ();
+}
+
+void
+test3 (void)
+{
+ int sum = 0;
+
+ #pragma omp unroll partial
+ for (int i = -3; i != 1; ++i)
+ #pragma omp unroll partial
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ __builtin_abort ();
+}
+
+int sum;
+
+void
+test4 (void)
+{
+ #pragma omp for reduction(+:sum)
+ #pragma omp unroll partial(5)
+ for (int i = -3; i != 1; ++i)
+ #pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ __builtin_abort ();
+}
+
+void
+test5 (void)
+{
+ int sum = 0;
+
+ #pragma omp parallel for reduction(+:sum)
+ #pragma omp unroll partial(2)
+ for (int i = -3; i != 1; ++i)
+ #pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ __builtin_abort ();
+}
+
+void
+test6 (void)
+{
+ int sum = 0;
+ #pragma omp target parallel for reduction(+:sum)
+ #pragma omp unroll partial(7)
+ for (int i = -3; i != 1; ++i)
+ #pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ __builtin_abort ();
+}
+
+void
+test7 (void)
+{
+ int sum = 0;
+#pragma omp target teams distribute parallel for reduction(+:sum)
+#pragma omp unroll partial(7)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ test1 ();
+ test2 ();
+ test3 ();
+ test4 ();
+ test5 ();
+ test6 ();
+ test7 ();
+ return 0;
+}
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O0 -fdump-tree-original -Wall -Wno-unknown-pragmas" } */
+
+#define COMMON_DIRECTIVE
+#define COMMON_TOP_TRANSFORM omp unroll full
+#define COLLAPSE_1
+#define COLLAPSE_2
+#define COLLAPSE_3
+#define IMPLEMENTATION_FILE "../libgomp.c-c++-common/matrix-constant-iter.h"
+
+#include "../libgomp.c-c++-common/matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "unroll full" 13 "original" } } */
@@ -0,0 +1,57 @@
+// { dg-additional-options "-std=c++11 -O0" }
+
+#include <vector>
+
+constexpr unsigned
+fib (unsigned n)
+{
+ return n <= 2 ? 1 : fib (n-1) + fib (n-2);
+}
+
+int
+test1 ()
+{
+ std::vector<int> v;
+
+ for (unsigned i = 0; i <= 9; i++)
+ v.push_back (1);
+
+ int sum = 0;
+ for (int k = 0; k < 10; k++)
+ #pragma omp tile sizes(fib(4))
+ for (int i : v) {
+ for (int j = 8; j != -2; --j)
+ sum = sum + i;
+ }
+
+ return sum;
+}
+
+int
+test2 ()
+{
+ std::vector<int> v;
+
+ for (unsigned i = 0; i <= 10; i++)
+ v.push_back (i);
+
+ int sum = 0;
+ for (int k = 0; k < 10; k++)
+ #pragma omp parallel for collapse(2) reduction(+:sum)
+ #pragma omp tile sizes(fib(4), 1)
+ for (int i : v)
+ for (int j = 8; j > -2; --j)
+ sum = sum + i;
+
+ return sum;
+}
+
+int
+main ()
+{
+ if (test1 () != 1000)
+ __builtin_abort ();
+
+ if (test2 () != 5500)
+ __builtin_abort ();
+}
@@ -0,0 +1,26 @@
+// { dg-additional-options "-std=c++11 -O0" }
+
+#include <vector>
+
+int
+main ()
+{
+ std::vector<int> v;
+ std::vector<int> w;
+
+ for (unsigned i = 0; i <= 9; i++)
+ v.push_back (i);
+
+ int iter = 0;
+ #pragma omp for
+ #pragma omp tile sizes(5)
+ for (int i : v)
+ {
+ w.push_back (iter);
+ iter++;
+ }
+
+ for (int i = 0; i < w.size (); i++)
+ if (w[i] != i)
+ __builtin_abort ();
+}
@@ -0,0 +1,63 @@
+// { dg-additional-options "-std=c++11 -O0" }
+
+#include <vector>
+#include <stdio.h>
+
+constexpr unsigned
+fib (unsigned n)
+{
+ return n <= 2 ? 1 : fib (n-1) + fib (n-2);
+}
+
+int
+test1 ()
+{
+ std::vector<int> v;
+
+ for (unsigned i = 0; i <= 9; i++)
+ v.push_back (1);
+
+ int sum = 0;
+ for (int k = 0; k < 10; k++)
+ #pragma omp unroll partial(fib(3))
+ for (int i : v)
+ {
+ for (int j = 8; j != -2; --j)
+ sum = sum + i;
+ }
+
+ return sum;
+}
+
+int
+test2 ()
+{
+ std::vector<int> v;
+
+ for (unsigned i = 0; i <= 10; i++)
+ v.push_back (i);
+
+ int sum = 0;
+ #pragma omp parallel for reduction(+:sum)
+ for (int k = 0; k < 10; k++)
+ #pragma omp unroll
+ #pragma omp unroll partial(fib(4))
+ for (int i : v)
+ {
+ #pragma omp unroll full
+ for (int j = 8; j != -2; --j)
+ sum = sum + i;
+ }
+
+ return sum;
+}
+
+int
+main ()
+{
+ if (test1 () != 1000)
+ __builtin_abort ();
+
+ if (test2 () != 5500)
+ __builtin_abort ();
+}
@@ -0,0 +1,31 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++11" }
+
+#include <vector>
+#include <iostream>
+
+int
+main ()
+{
+ std::vector<std::vector<int>> v;
+ std::vector<int> w;
+
+ for (unsigned i = 0; i < 10; i++)
+ {
+ std::vector<int> u;
+ for (unsigned j = 0; j < 10; j++)
+ u.push_back (j);
+ v.push_back (u);
+ }
+
+ #pragma omp for
+ #pragma omp unroll partial(7)
+ for (auto u : v)
+ for (int x : u)
+ w.push_back (x);
+
+ std::size_t l = w.size ();
+ for (std::size_t i = 0; i < l; i++)
+ if (w[i] != i % 10)
+ __builtin_abort ();
+}
@@ -0,0 +1,81 @@
+template <int dim0, int dim1>
+int sum ()
+{
+ int sum = 0;
+ #pragma omp unroll full
+ #pragma omp tile sizes (dim0, dim1)
+ for (unsigned i = 0; i < 4; i++)
+ for (unsigned j = 0; j < 5; j++)
+ sum++;
+
+ return sum;
+}
+
+int main ()
+{
+ if (sum <1,1> () != 20)
+ __builtin_abort ();
+ if (sum <1,2> () != 20)
+ __builtin_abort ();
+ if (sum <1,3> () != 20)
+ __builtin_abort ();
+ if (sum <1,4> () != 20)
+ __builtin_abort ();
+ if (sum <1,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <2,1> () != 20)
+ __builtin_abort ();
+ if (sum <2,2> () != 20)
+ __builtin_abort ();
+ if (sum <2,3> () != 20)
+ __builtin_abort ();
+ if (sum <2,4> () != 20)
+ __builtin_abort ();
+ if (sum <2,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <3,1> () != 20)
+ __builtin_abort ();
+ if (sum <3,2> () != 20)
+ __builtin_abort ();
+ if (sum <3,3> () != 20)
+ __builtin_abort ();
+ if (sum <3,4> () != 20)
+ __builtin_abort ();
+ if (sum <3,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <4,1> () != 20)
+ __builtin_abort ();
+ if (sum <4,2> () != 20)
+ __builtin_abort ();
+ if (sum <4,3> () != 20)
+ __builtin_abort ();
+ if (sum <4,4> () != 20)
+ __builtin_abort ();
+ if (sum <4,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <5,1> () != 20)
+ __builtin_abort ();
+ if (sum <5,2> () != 20)
+ __builtin_abort ();
+ if (sum <5,3> () != 20)
+ __builtin_abort ();
+ if (sum <5,4> () != 20)
+ __builtin_abort ();
+ if (sum <5,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <6,1> () != 20)
+ __builtin_abort ();
+ if (sum <6,2> () != 20)
+ __builtin_abort ();
+ if (sum <6,3> () != 20)
+ __builtin_abort ();
+ if (sum <6,4> () != 20)
+ __builtin_abort ();
+ if (sum <6,5> () != 20)
+ __builtin_abort ();
+}
@@ -0,0 +1,70 @@
+! { dg-do run }
+
+! Like imperfect1.f90, but also includes loop transforms.
+
+program foo
+ integer, save :: f1count(3), f2count(3)
+
+ f1count(1) = 0
+ f1count(2) = 0
+ f1count(3) = 0
+ f2count(1) = 0
+ f2count(2) = 0
+ f2count(3) = 0
+
+ call s1 (3, 4, 5)
+
+ ! All intervening code at the same depth must be executed the same
+ ! number of times.
+ if (f1count(1) /= f2count(1)) error stop 101
+ if (f1count(2) /= f2count(2)) error stop 102
+ if (f1count(3) /= f2count(3)) error stop 103
+
+ ! Intervening code must be executed at least as many times as the loop
+ ! that encloses it.
+ if (f1count(1) < 3) error stop 111
+ if (f1count(2) < 3 * 4) error stop 112
+
+ ! Intervening code must not be executed more times than the number
+ ! of logical iterations.
+ if (f1count(1) > 3 * 4 * 5) error stop 121
+ if (f1count(2) > 3 * 4 * 5) error stop 122
+
+ ! Check that the innermost loop body is executed exactly the number
+ ! of logical iterations expected.
+ if (f1count(3) /= 3 * 4 * 5) error stop 131
+
+contains
+
+subroutine f1 (depth, iter)
+ integer :: depth, iter
+ f1count(depth) = f1count(depth) + 1
+end subroutine
+
+subroutine f2 (depth, iter)
+ integer :: depth, iter
+ f2count(depth) = f2count(depth) + 1
+end subroutine
+
+subroutine s1 (a1, a2, a3)
+ integer :: a1, a2, a3
+ integer :: i, j, k
+
+ !$omp do collapse(2) private (j, k)
+ do i = 1, a1
+ call f1 (1, i)
+ do j = 1, a2
+ call f1 (2, j)
+ !$omp unroll partial
+ do k = 1, a3
+ call f1 (3, k)
+ call f2 (3, k)
+ end do
+ call f2 (2, j)
+ end do
+ call f2 (1, i)
+ end do
+
+end subroutine
+
+end program
@@ -0,0 +1,70 @@
+! { dg-do run }
+
+! Like imperfect1.f90, but also includes loop transforms.
+
+program foo
+ integer, save :: f1count(3), f2count(3)
+
+ f1count(1) = 0
+ f1count(2) = 0
+ f1count(3) = 0
+ f2count(1) = 0
+ f2count(2) = 0
+ f2count(3) = 0
+
+ call s1 (3, 4, 5)
+
+ ! All intervening code at the same depth must be executed the same
+ ! number of times.
+ if (f1count(1) /= f2count(1)) error stop 101
+ if (f1count(2) /= f2count(2)) error stop 102
+ if (f1count(3) /= f2count(3)) error stop 103
+
+ ! Intervening code must be executed at least as many times as the loop
+ ! that encloses it.
+ if (f1count(1) < 3) error stop 111
+ if (f1count(2) < 3 * 4) error stop 112
+
+ ! Intervening code must not be executed more times than the number
+ ! of logical iterations.
+ if (f1count(1) > 3 * 4 * 5) error stop 121
+ if (f1count(2) > 3 * 4 * 5) error stop 122
+
+ ! Check that the innermost loop body is executed exactly the number
+ ! of logical iterations expected.
+ if (f1count(3) /= 3 * 4 * 5) error stop 131
+
+contains
+
+subroutine f1 (depth, iter)
+ integer :: depth, iter
+ f1count(depth) = f1count(depth) + 1
+end subroutine
+
+subroutine f2 (depth, iter)
+ integer :: depth, iter
+ f2count(depth) = f2count(depth) + 1
+end subroutine
+
+subroutine s1 (a1, a2, a3)
+ integer :: a1, a2, a3
+ integer :: i, j, k
+
+ !$omp do collapse(2) private (j, k)
+ do i = 1, a1
+ call f1 (1, i)
+ do j = 1, a2
+ call f1 (2, j)
+ !$omp tile sizes(5)
+ do k = 1, a3
+ call f1 (3, k)
+ call f2 (3, k)
+ end do
+ call f2 (2, j)
+ end do
+ call f2 (1, i)
+ end do
+
+end subroutine
+
+end program
@@ -0,0 +1,77 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ !$omp target parallel do collapse(2) &
+ !$omp & private(inner, i, j, k) map(to:a,b) map(from:c)
+ !$omp tile sizes (8, 1)
+ do i = 1,m
+ !$omp tile sizes (8)
+ do j = 1,n
+ !$omp unroll partial(10)
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ if (k == n) then
+ c(j, i) = inner
+ endif
+ end do
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) stop 1
+ end do
+ end do
+
+end program main
@@ -0,0 +1,23 @@
+! { dg-do run }
+
+program foo
+ integer :: count
+ call s1
+contains
+
+ subroutine s1 ()
+ integer :: i, count
+
+ count = 0
+
+ !$omp target parallel do map(tofrom:count) reduction(+:count) private(i)
+ !$omp unroll partial
+ do i = 1, 100
+ count = count + 1
+ end do
+
+ if (count .ne. 100) stop 1
+
+ end subroutine
+
+end program
@@ -0,0 +1,74 @@
+! { dg-do run }
+
+! Like imperfect-transform.f90, but enables offloading.
+
+program foo
+ integer, save :: f1count(3), f2count(3)
+ !$omp declare target enter (f1count, f2count)
+
+ f1count(1) = 0
+ f1count(2) = 0
+ f1count(3) = 0
+ f2count(1) = 0
+ f2count(2) = 0
+ f2count(3) = 0
+
+ call s1 (3, 4, 5)
+
+ ! All intervening code at the same depth must be executed the same
+ ! number of times.
+ if (f1count(1) /= f2count(1)) error stop 101
+ if (f1count(2) /= f2count(2)) error stop 102
+ if (f1count(3) /= f2count(3)) error stop 103
+
+ ! Intervening code must be executed at least as many times as the loop
+ ! that encloses it.
+ if (f1count(1) < 3) error stop 111
+ if (f1count(2) < 3 * 4) error stop 112
+
+ ! Intervening code must not be executed more times than the number
+ ! of logical iterations.
+ if (f1count(1) > 3 * 4 * 5) error stop 121
+ if (f1count(2) > 3 * 4 * 5) error stop 122
+
+ ! Check that the innermost loop body is executed exactly the number
+ ! of logical iterations expected.
+ if (f1count(3) /= 3 * 4 * 5) error stop 131
+
+contains
+
+subroutine f1 (depth, iter)
+ integer :: depth, iter
+ !$omp atomic
+ f1count(depth) = f1count(depth) + 1
+end subroutine
+
+subroutine f2 (depth, iter)
+ integer :: depth, iter
+ !$omp atomic
+ f2count(depth) = f2count(depth) + 1
+end subroutine
+
+subroutine s1 (a1, a2, a3)
+ integer :: a1, a2, a3
+ integer :: i, j, k
+
+ !$omp target parallel do collapse(2) map(always, tofrom:f1count, f2count) &
+ !$omp & private (j, k)
+ do i = 1, a1
+ call f1 (1, i)
+ do j = 1, a2
+ call f1 (2, j)
+ !$omp unroll partial
+ do k = 1, a3
+ call f1 (3, k)
+ call f2 (3, k)
+ end do
+ call f2 (2, j)
+ end do
+ call f2 (1, i)
+ end do
+
+end subroutine
+
+end program
@@ -0,0 +1,74 @@
+! { dg-do run }
+
+! Like imperfect-transform.f90, but enables offloading.
+
+program foo
+ integer, save :: f1count(3), f2count(3)
+ !$omp declare target enter (f1count, f2count)
+
+ f1count(1) = 0
+ f1count(2) = 0
+ f1count(3) = 0
+ f2count(1) = 0
+ f2count(2) = 0
+ f2count(3) = 0
+
+ call s1 (3, 4, 5)
+
+ ! All intervening code at the same depth must be executed the same
+ ! number of times.
+ if (f1count(1) /= f2count(1)) error stop 101
+ if (f1count(2) /= f2count(2)) error stop 102
+ if (f1count(3) /= f2count(3)) error stop 103
+
+ ! Intervening code must be executed at least as many times as the loop
+ ! that encloses it.
+ if (f1count(1) < 3) error stop 111
+ if (f1count(2) < 3 * 4) error stop 112
+
+ ! Intervening code must not be executed more times than the number
+ ! of logical iterations.
+ if (f1count(1) > 3 * 4 * 5) error stop 121
+ if (f1count(2) > 3 * 4 * 5) error stop 122
+
+ ! Check that the innermost loop body is executed exactly the number
+ ! of logical iterations expected.
+ if (f1count(3) /= 3 * 4 * 5) error stop 131
+
+contains
+
+subroutine f1 (depth, iter)
+ integer :: depth, iter
+ !$omp atomic
+ f1count(depth) = f1count(depth) + 1
+end subroutine
+
+subroutine f2 (depth, iter)
+ integer :: depth, iter
+ !$omp atomic
+ f2count(depth) = f2count(depth) + 1
+end subroutine
+
+subroutine s1 (a1, a2, a3)
+ integer :: a1, a2, a3
+ integer :: i, j, k
+
+ !$omp target parallel do collapse(2) map(always, tofrom:f1count, f2count) &
+ !$omp & private(j, k)
+ do i = 1, a1
+ call f1 (1, i)
+ do j = 1, a2
+ call f1 (2, j)
+ !$omp tile sizes(5)
+ do k = 1, a3
+ call f1 (3, k)
+ call f2 (3, k)
+ end do
+ call f2 (2, j)
+ end do
+ call f2 (1, i)
+ end do
+
+end subroutine
+
+end program
@@ -0,0 +1,70 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ !$omp parallel do collapse(2) private(inner, i, j, k)
+ !$omp tile sizes (8, 1)
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) stop 1
+ end do
+ end do
+
+end program main
@@ -0,0 +1,107 @@
+! { dg-do run }
+
+module test_functions
+ contains
+ integer function compute_sum1() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) private(j)
+ do i = 1,10,3
+ !$omp tile sizes(2)
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum2() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) private(j)
+ do i = 1,10,3
+ !$omp tile sizes(16)
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum3() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) private(j)
+ do i = 1,10,3
+ !$omp tile sizes(100)
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum4() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) private(i, j)
+ !$omp tile sizes(6,10)
+ do i = 1,10,3
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum5() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do collapse(2) reduction(+:sum) private(i, j)
+ !$omp tile sizes(6,10)
+ do i = 1,10,3
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ !$omp end tile
+ !$omp end parallel do
+ end function
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: result
+
+ result = compute_sum1 ()
+ if (result .ne. 16) then
+ stop 1
+ end if
+
+ result = compute_sum2 ()
+ if (result .ne. 16) then
+ stop 2
+ end if
+
+ result = compute_sum3 ()
+ if (result .ne. 16) then
+ stop 3
+ end if
+
+ result = compute_sum4 ()
+ if (result .ne. 16) then
+ stop 4
+ end if
+
+ result = compute_sum5 ()
+ if (result .ne. 16) then
+ stop 5
+ end if
+end program
@@ -0,0 +1,108 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c(n, m))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(10)
+ !$omp tile sizes(1, 3)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult
+
+ function mult2 (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c(n, m))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes(1,2)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult2
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+ integer, allocatable :: a(:,:), b(:,:), c(:,:), d(:, :)
+ integer :: i, j
+
+ allocate(a(n, m))
+ allocate(b(n, m))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ d = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (d)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. d(i,j)) stop 1
+ end do
+ end do
+
+ c = mult2 (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) stop 2
+ end do
+ end do
+
+end program main
@@ -0,0 +1,71 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function copy (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c(n, m))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes (1,5)
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = c(j,i) + a(j, i)
+ end do
+ end do
+ end function copy
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a(n, m))
+ allocate(b(n, m))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = 1
+ end do
+ end do
+
+ c = copy (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (c(i,j) .ne. a(i,j)) stop 1
+ end do
+ end do
+
+end program main
@@ -0,0 +1,75 @@
+module matrix
+ implicit none
+ integer :: n = 4
+ integer :: m = 4
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c(n, m))
+ !$omp parallel do private(inner, j, k)
+ do i = 1,m
+ !$omp unroll partial(4)
+ !$omp tile sizes (5)
+ do j = 1,n
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ if (k == n) then
+ c(j, i) = inner
+ endif
+ end do
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a(n, m))
+ allocate(b(n, m))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) stop 1
+ end do
+ end do
+
+end program main
@@ -0,0 +1,74 @@
+module matrix
+ implicit none
+ integer :: n = 4
+ integer :: m = 4
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c(n, m))
+ do i = 1,m
+ do j = 1,n
+ c(j, i) = 0
+ end do
+ end do
+
+ !$omp parallel do private(j, k)
+ do i = 1,m
+ !$omp tile sizes (5,2)
+ do j = 1,n
+ do k = 1, n
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a(n, m))
+ allocate(b(n, m))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) stop 1
+ end do
+ end do
+
+end program main
@@ -0,0 +1,49 @@
+! { dg-do run }
+
+module test_functions
+ contains
+ integer function compute_sum() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) private(j)
+ do i = 1,10,3
+ !$omp unroll full
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum2() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) private(i, j)
+ !$omp unroll partial(2)
+ do i = 1,10,3
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+
+ integer :: result
+
+ result = compute_sum ()
+ if (result .ne. 16) then
+ stop 1
+ end if
+
+ result = compute_sum2 ()
+ if (result .ne. 16) then
+ stop 2
+ end if
+end program
@@ -0,0 +1,76 @@
+! { dg-do run }
+! { dg-additional-options "-g" }
+
+module test_functions
+contains
+ integer function compute_sum1 () result(sum)
+ implicit none
+ integer :: i
+
+ sum = 0
+ !$omp unroll full
+ do i = 1,10,3
+ sum = sum + 1
+ end do
+ end function compute_sum1
+
+ integer function compute_sum2() result(sum)
+ implicit none
+ integer :: i
+
+ sum = 0
+ !$omp unroll full
+ do i = -20,1,3
+ sum = sum + 1
+ end do
+ end function compute_sum2
+
+ integer function compute_sum3() result(sum)
+ implicit none
+ integer :: i
+
+ sum = 0
+ !$omp unroll full
+ do i = 30,1,-3
+ sum = sum + 1
+ end do
+ end function compute_sum3
+
+ integer function compute_sum4() result(sum)
+ implicit none
+ integer :: i
+
+ sum = 0
+ !$omp unroll full
+ do i = 50,-60,-10
+ sum = sum + 1
+ end do
+ end function compute_sum4
+
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: result
+
+ result = compute_sum1 ()
+ if (result .ne. 4) then
+ stop 1
+ end if
+
+ result = compute_sum2 ()
+ if (result .ne. 8) then
+ stop 2
+ end if
+
+ result = compute_sum3 ()
+ if (result .ne. 10) then
+ stop 3
+ end if
+
+ result = compute_sum4 ()
+ if (result .ne. 12) then
+ stop 4
+ end if
+end program
@@ -0,0 +1,56 @@
+! Test lowering of the internal representation of "omp unroll" loops
+! which are not unrolled.
+
+! { dg-do run }
+
+module test_functions
+contains
+ integer function compute_sum1 () result(sum)
+ implicit none
+ integer :: i
+
+ sum = 0
+ !$omp unroll
+ do i = 0,50
+ sum = sum + 1
+ end do
+ end function compute_sum1
+
+ integer function compute_sum3 (step,n) result(sum)
+ implicit none
+ integer :: i, step, n
+
+ sum = 0
+ !$omp unroll
+ do i = 0,n,step
+ sum = sum + 1
+ end do
+ end function compute_sum3
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+
+ integer :: result
+
+ result = compute_sum1 ()
+ if (result .ne. 51) then
+ stop 1
+ end if
+
+ result = compute_sum3 (1, 100)
+ if (result .ne. 101) then
+ stop 2
+ end if
+
+ result = compute_sum3 (2, 100)
+ if (result .ne. 51) then
+ stop 3
+ end if
+
+ result = compute_sum3 (-2, -100)
+ if (result .ne. 51) then
+ stop 4
+ end if
+end program
@@ -0,0 +1,63 @@
+! { dg-do run }
+! { dg-additional-options "-g" }
+
+module test_functions
+contains
+ integer function compute_sum1 () result(sum)
+ implicit none
+ integer :: i
+
+ sum = 0
+ !$omp unroll partial(2)
+ do i = 1,50
+ sum = sum + 1
+ end do
+ end function compute_sum1
+
+ integer function compute_sum3 (step,n) result(sum)
+ implicit none
+ integer :: i, step, n
+
+ sum = 0
+ !$omp unroll partial(5)
+ do i = 1,n,step
+ sum = sum + 1
+ end do
+ end function compute_sum3
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: result
+
+ result = compute_sum1 ()
+ if (result .ne. 50) then
+ stop 1
+ end if
+
+ result = compute_sum3 (1, 100)
+ if (result .ne. 100) then
+ stop 2
+ end if
+
+ result = compute_sum3 (1, 9)
+ if (result .ne. 9) then
+ stop 3
+ end if
+
+ result = compute_sum3 (2, 96)
+ if (result .ne. 48) then
+ stop 4
+ end if
+
+ result = compute_sum3 (-2, -98)
+ if (result .ne. 50) then
+ stop 5
+ end if
+
+ result = compute_sum3 (-2, -100)
+ if (result .ne. 51) then
+ stop 6
+ end if
+end program
@@ -0,0 +1,48 @@
+! { dg-do run }
+! { dg-additional-options "-g" }
+
+module test_functions
+contains
+ integer function compute_sum4 (step,n) result(sum)
+ implicit none
+ integer :: i, step, n
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) private(i)
+ !$omp unroll partial(5)
+ do i = 1,n,step
+ sum = sum + 1
+ end do
+ end function compute_sum4
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: result
+
+ result = compute_sum4 (1, 100)
+ if (result .ne. 100) then
+ stop 1
+ end if
+
+ result = compute_sum4 (1, 9)
+ if (result .ne. 9) then
+ stop 2
+ end if
+
+ result = compute_sum4 (2, 96)
+ if (result .ne. 48) then
+ stop 3
+ end if
+
+ result = compute_sum4 (-2, -98)
+ if (result .ne. 50) then
+ stop 4
+ end if
+
+ result = compute_sum4 (-2, -100)
+ if (result .ne. 51) then
+ stop 5
+ end if
+end program
@@ -0,0 +1,102 @@
+! { dg-do run }
+! { dg-additional-options "-g" }
+
+module test_functions
+contains
+ integer function compute_sum4 (step,n) result(sum)
+ implicit none
+ integer :: i, step, n
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) lastprivate(i)
+ !$omp unroll partial(5)
+ do i = 1,n,step
+ sum = sum + 1
+ end do
+ end function compute_sum4
+
+ integer function compute_sum5 (step,n) result(sum)
+ implicit none
+ integer :: i, step, n
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) lastprivate(i)
+ !$omp unroll partial(5)
+ !$omp unroll partial(10)
+ do i = 1,n,step
+ sum = sum + 1
+ end do
+ end function compute_sum5
+
+ integer function compute_sum6 (step,n) result(sum)
+ implicit none
+ integer :: i, j, step, n
+
+ sum = 0
+ !$omp parallel do reduction(+:sum) lastprivate(i) &
+ !$omp & private(j)
+ do i = 1,n,step
+ !$omp unroll full
+ !$omp unroll partial(10)
+ do j = 1, 1000
+ sum = sum + 1
+ end do
+ end do
+ end function compute_sum6
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: result
+
+ result = compute_sum4 (1, 100)
+ if (result .ne. 100) then
+ stop 1
+ end if
+
+ result = compute_sum4 (1, 9)
+ if (result .ne. 9) then
+ stop 2
+ end if
+
+ result = compute_sum4 (2, 96)
+ if (result .ne. 48) then
+ stop 3
+ end if
+
+ result = compute_sum4 (-2, -98)
+ if (result .ne. 50) then
+ stop 4
+ end if
+
+ result = compute_sum4 (-2, -100)
+ if (result .ne. 51) then
+ stop 5
+ end if
+
+ result = compute_sum5 (1, 100)
+ if (result .ne. 100) then
+ stop 6
+ end if
+
+ result = compute_sum5 (1, 9)
+ if (result .ne. 9) then
+ stop 7
+ end if
+
+ result = compute_sum5 (2, 96)
+ if (result .ne. 48) then
+ stop 8
+ end if
+
+ result = compute_sum5 (-2, -98)
+ if (result .ne. 50) then
+ stop 9
+ end if
+
+ result = compute_sum5 (-2, -100)
+ if (result .ne. 51) then
+ stop 10
+ end if
+end program
@@ -0,0 +1,7 @@
+! { dg-do run }
+! { dg-additional-options "-g -cpp" }
+
+! Check an unroll factor that divides the number of iterations
+! of the loops in the test implementation.
+#define UNROLL_FACTOR 5
+#include "unroll-7.f90"
@@ -0,0 +1,7 @@
+! { dg-do run }
+! { dg-additional-options "-g -cpp" }
+
+! Check an unroll factor that does not divide the number of iterations
+! of the loops in the test implementation.
+#define UNROLL_FACTOR 3
+#include "unroll-7.f90"
@@ -0,0 +1,7 @@
+! { dg-do run }
+! { dg-additional-options "-g -cpp" }
+
+! Check an unroll factor that is larger than the number of iterations
+! of the loops in the test implementation.
+#define UNROLL_FACTOR 113
+#include "unroll-7.f90"
@@ -0,0 +1,186 @@
+! { dg-do run }
+! { dg-additional-options "-cpp" }
+
+#ifndef UNROLL_FACTOR
+#define UNROLL_FACTOR 1
+#endif
+module test_functions
+contains
+ subroutine copy (array1, array2)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+ integer :: i
+
+ !$omp parallel do private(i)
+ !$omp unroll partial(UNROLL_FACTOR)
+ do i = 1, 100
+ array1(i) = array2(i)
+ end do
+ end subroutine
+
+ subroutine copy2 (array1, array2)
+ implicit none
+
+ integer :: array1(100)
+ integer :: array2(100)
+ integer :: i
+
+ !$omp parallel do private(i)
+ !$omp unroll partial(UNROLL_FACTOR)
+ do i = 0,99
+ array1(i+1) = array2(i+1)
+ end do
+ end subroutine copy2
+
+ subroutine copy3 (array1, array2)
+ implicit none
+
+ integer :: array1(100)
+ integer :: array2(100)
+ integer :: i
+
+ !$omp parallel do lastprivate(i)
+ !$omp unroll partial(UNROLL_FACTOR)
+ do i = -49,50
+ if (i < 0) then
+ array1((-1)*i) = array2((-1)*i)
+ else
+ array1(50+i) = array2(50+i)
+ endif
+ end do
+ end subroutine copy3
+
+ subroutine copy4 (array1, array2)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+ integer :: i
+
+ !$omp parallel do private(i)
+ !$omp unroll partial(UNROLL_FACTOR)
+ do i = 2, 200, 2
+ array1(i/2) = array2(i/2)
+ end do
+ end subroutine copy4
+
+ subroutine copy5 (array1, array2)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+ integer :: i
+
+ !$omp parallel do private(i)
+ !$omp unroll partial(UNROLL_FACTOR)
+ do i = 200, 2, -2
+ array1(i/2) = array2(i/2)
+ end do
+ end subroutine
+
+ subroutine copy6 (array1, array2, lower, upper, step)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+ integer :: lower, upper, step
+ integer :: i
+
+ !$omp parallel do private(i)
+ !$omp unroll partial(UNROLL_FACTOR)
+ do i = lower, upper, step
+ array1 (i) = array2(i)
+ end do
+ end subroutine
+
+ subroutine prepare (array1, array2)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+
+ array1 = 2
+ array2 = 0
+ end subroutine
+
+ subroutine check_equal (array1, array2)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+ integer :: i
+
+ do i=1,100
+ if (array1(i) /= array2(i)) then
+ stop 1
+ end if
+ end do
+ end subroutine
+
+ subroutine check_equal_at_steps (array1, array2, lower, upper, step)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+ integer :: lower, upper, step
+ integer :: i
+
+ do i=lower, upper, step
+ if (array1(i) /= array2(i)) then
+ stop 2
+ end if
+ end do
+ end subroutine
+
+ subroutine check_unchanged_at_non_steps (array1, array2, lower, upper, step)
+ implicit none
+ integer :: array1(:)
+ integer :: array2(:)
+ integer :: lower, upper, step
+ integer :: i, j
+
+ do i=lower, upper,step
+ do j=i,i+step-1
+ if (array2(j) /= 0) then
+ stop 3
+ end if
+ end do
+ end do
+ end subroutine
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: array1(100), array2(100)
+
+ call prepare (array1, array2)
+ call copy (array1, array2)
+ call check_equal (array1, array2)
+
+ call prepare (array1, array2)
+ call copy2 (array1, array2)
+ call check_equal (array1, array2)
+
+ call prepare (array1, array2)
+ call copy3 (array1, array2)
+ call check_equal (array1, array2)
+
+ call prepare (array1, array2)
+ call copy4 (array1, array2)
+ call check_equal (array1, array2)
+
+ call prepare (array1, array2)
+ call copy5 (array1, array2)
+ call check_equal (array1, array2)
+
+ call prepare (array1, array2)
+ call copy6 (array1, array2, 1, 100, 5)
+ call check_equal_at_steps (array1, array2, 1, 100, 5)
+ call check_unchanged_at_non_steps (array1, array2, 1, 100, 5)
+
+ call prepare (array1, array2)
+ call copy6 (array1, array2, 1, 50, 5)
+ call check_equal_at_steps (array1, array2, 1, 50, 5)
+ call check_unchanged_at_non_steps (array1, array2, 1, 50, 5)
+
+ call prepare (array1, array2)
+ call copy6 (array1, array2, 3, 18, 7)
+ call check_equal_at_steps (array1, array2, 3 , 18, 7)
+ call check_unchanged_at_non_steps (array1, array2, 3, 18, 7)
+end program
@@ -0,0 +1,35 @@
+! { dg-do run }
+! { dg-additional-options "-g" }
+
+module test_functions
+contains
+ subroutine copy (array1, array2, step, n)
+ implicit none
+ integer :: array1(n)
+ integer :: array2(n)
+ integer :: i, step, n
+
+ call omp_set_num_threads (4)
+ !$omp parallel do shared(array1) shared(array2) schedule(static, 4) &
+ !$omp & private(i)
+ !$omp unroll partial(2)
+ do i = 1,n
+ array1(i) = array2(i)
+ end do
+ end subroutine
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: array1(100), array2(100)
+ integer :: i
+
+ array1 = 2
+ call copy(array1, array2, 1, 100)
+ do i=1,100
+ if (array1(i) /= array2(i)) then
+ stop 1
+ end if
+ end do
+end program
@@ -0,0 +1,30 @@
+! { dg-do run }
+! { dg-options "-fno-openmp -fopenmp-simd" }
+
+module test_functions
+ contains
+ integer function compute_sum() result(sum)
+ implicit none
+ integer :: i,j
+
+ sum = 0
+ !$omp simd reduction(+:sum)
+ do i = 1,10,3
+ !$omp unroll full
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function compute_sum
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+ integer :: result
+
+ result = compute_sum ()
+ if (result .ne. 16) then
+ stop 1
+ end if
+end program
@@ -0,0 +1,108 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(10)
+ !$omp tile sizes(1, 3)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult
+
+ function mult2 (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c(n, m))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes(1,2)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult2
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+ integer, allocatable :: a(:,:),b(:,:),c(:,:),d(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ d = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (d)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. d(i,j)) stop 1
+ end do
+ end do
+
+ c = mult2 (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) stop 2
+ end do
+ end do
+
+end program main
@@ -0,0 +1,70 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function copy (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c(n, m))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes (1,5)
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = c(j,i) + a(j, i)
+ end do
+ end do
+ end function copy
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+end module matrix
+
+program main
+ use matrix
+ implicit none
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a(n, m))
+ allocate(b(n, m))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = 1
+ end do
+ end do
+
+ c = copy (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (c(i,j) .ne. a(i,j)) stop 1
+ end do
+ end do
+
+end program main