commit 40193f49480f0a0b750d15049d29fd427282c5f0
Author: Julian Brown <julian@codesourcery.com>
Date: Tue Jun 16 03:50:55 2015 -0700
New set of private variable/state propagation tests.
new file mode 100644
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+/* Test of gang-private variables declared in local scope with parallel
+ directive. */
+
+#if defined(ACC_DEVICE_TYPE_host) || defined(ACC_DEVICE_TYPE_host_nonshm)
+#define ACTUAL_GANGS 1
+#else
+#define ACTUAL_GANGS 32
+#endif
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[ACTUAL_GANGS];
+
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ arr[i] = 3;
+
+ #pragma acc parallel copy(arr) num_gangs(ACTUAL_GANGS) num_workers(8) \
+ vector_length(32)
+ {
+ int x;
+
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ x = i * 2;
+
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ arr[i] += x;
+ }
+
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ assert (arr[i] == 3 + i * 2);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,56 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Back-to-back worker loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Successive vector loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,57 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Aggregate worker variable. */
+
+typedef struct
+{
+ int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ vec2 pt;
+
+ pt.x = i ^ j * 3;
+ pt.y = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.x * k;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Addressable worker variable. */
+
+typedef struct
+{
+ int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ vec2 pt, *ptp;
+
+ ptp = &pt;
+
+ pt.x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += ptp->x * k;
+
+ ptp->y = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Array worker variable. */
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int pt[2];
+
+ pt[0] = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+ pt[1] = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,27 @@
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32];
+
+ for (i = 0; i < 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+ arr[i] += x;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == i * 3);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+ to partitioned workers. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,31 @@
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+ to partitioned vectors. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+
+ #pragma acc loop vector
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private addressable variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ int *p = &x;
+
+ x = i * 2;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+
+ (*p)--;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private array variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+int
+main (int argc, char* argv[])
+{
+ int x[8], i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ for (int j = 0; j < 8; j++)
+ x[j] = j * 2;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x[j % 8];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i % 8) * 2);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private aggregate variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+typedef struct {
+ int x, y, z;
+ int attr[13];
+} vec3;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32];
+ vec3 pt;
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(pt)
+ for (i = 0; i < 32; i++)
+ {
+ pt.x = i;
+ pt.y = i * 2;
+ pt.z = i * 4;
+ pt.attr[5] = i * 6;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 13);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,51 @@
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive. */
+
+int
+main (int argc, char* argv[])
+{
+ int x, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop vector private(x)
+ for (k = 0; k < 32; k++)
+ {
+ x = i ^ j * 3;
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop vector private(x)
+ for (k = 0; k < 32; k++)
+ {
+ x = i | j * 5;
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,46 @@
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive. Array type. */
+
+int
+main (int argc, char* argv[])
+{
+ int pt[2], i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop vector private(pt)
+ for (k = 0; k < 32; k++)
+ {
+ pt[0] = i ^ j * 3;
+ pt[1] = i | j * 5;
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ x = i ^ j * 3;
+ /* Try to ensure 'x' accesses doesn't get optimized into a
+ temporary. */
+ __asm__ __volatile__ ("");
+ arr[i * 32 + j] += x;
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,56 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Back-to-back worker loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Successive vector loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Addressable worker variable. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int *p = &x;
+
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ *p = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,57 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Aggregate worker variable. */
+
+typedef struct
+{
+ int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+ vec2 pt;
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(pt)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ pt.x = i ^ j * 3;
+ pt.y = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.x * k;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,56 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on loop directive, broadcasting
+ to vector-partitioned mode. Array worker variable. */
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+ int pt[2];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ /* "pt" is treated as "present_or_copy" on the parallel directive because it
+ is an array variable. */
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ /* But here, it is made private per-worker. */
+ #pragma acc loop worker private(pt)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ pt[0] = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+ pt[1] = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,25 @@
+#include <assert.h>
+
+/* Basic test of firstprivate variable. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32];
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 3;
+
+ #pragma acc parallel firstprivate(x) copy(arr) num_gangs(32) num_workers(8) \
+ vector_length(32)
+ {
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ arr[i] += x;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == 8);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+#include <assert.h>
+
+/* Test of gang-private variables declared on the parallel directive. */
+
+#if defined(ACC_DEVICE_TYPE_host) || defined(ACC_DEVICE_TYPE_host_nonshm)
+#define ACTUAL_GANGS 1
+#else
+#define ACTUAL_GANGS 32
+#endif
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[ACTUAL_GANGS];
+
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ arr[i] = 3;
+
+ #pragma acc parallel private(x) copy(arr) num_gangs(ACTUAL_GANGS) \
+ num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ x = i * 2;
+
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ arr[i] += x;
+ }
+
+ for (i = 0; i < ACTUAL_GANGS; i++)
+ assert (arr[i] == 3 + i * 2);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private array variable declared on the parallel directive. */
+
+int
+main (int argc, char* argv[])
+{
+ int x[32], i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) \
+ vector_length(32)
+ {
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ int j;
+ for (j = 0; j < 32; j++)
+ x[j] = j * 2;
+
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ arr[i * 32 + j] += x[31 - j];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (31 - (i % 32)) * 2);
+
+ return 0;
+}