[nvptx] Fix neutering of bb with only cond jump
2018-04-05 Tom de Vries <tom@codesourcery.com>
PR target/85204
* config/nvptx/nvptx.c (nvptx_single): Fix neutering of bb with only
cond jump.
* testsuite/libgomp.oacc-c-c++-common/broadcast-1.c: New test.
---
gcc/config/nvptx/nvptx.c | 6 ++-
.../libgomp.oacc-c-c++-common/broadcast-1.c | 49 ++++++++++++++++++++++
2 files changed, 54 insertions(+), 1 deletion(-)
@@ -4048,6 +4048,7 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
/* Insert the vector test inside the worker test. */
unsigned mode;
rtx_insn *before = tail;
+ rtx_insn *neuter_start = NULL;
for (mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++)
if (GOMP_DIM_MASK (mode) & skip_mask)
{
@@ -4065,7 +4066,10 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
br = gen_br_true (pred, label);
else
br = gen_br_true_uni (pred, label);
- emit_insn_before (br, head);
+ if (neuter_start)
+ neuter_start = emit_insn_after (br, neuter_start);
+ else
+ neuter_start = emit_insn_before (br, head);
LABEL_NUSES (label)++;
if (tail_branch)
new file mode 100644
@@ -0,0 +1,49 @@
+/* Ensure that worker-vector state conditional expressions are
+ properly handled by the nvptx backend. */
+
+#include <assert.h>
+#include <math.h>
+
+
+#define N 1024
+
+int A[N][N] ;
+
+void test(int x)
+{
+#pragma acc parallel num_gangs(16) num_workers(4) vector_length(32) copyout(A)
+ {
+#pragma acc loop gang
+ for(int j=0;j<N;j++)
+ {
+ if (x==1)
+ {
+#pragma acc loop worker vector
+ for(int i=0;i<N;i++)
+ A[i][j] = 1;
+ }
+ else
+ {
+#pragma acc loop worker vector
+ for(int i=0;i<N;i++)
+ A[i][j] = -1;
+ }
+ }
+ }
+}
+
+
+int main(void)
+{
+ test (0);
+ for (int i = 0; i < N; i++)
+ for (int j = 0; j < N; j++)
+ assert (A[i][j] == -1);
+
+ test (1);
+ for (int i = 0; i < N; i++)
+ for (int j = 0; j < N; j++)
+ assert (A[i][j] == 1);
+
+ return 0;
+}