[nvptx] Fix propagation of branch cond in vw-neutered code
2018-04-12 Tom de Vries <tom@codesourcery.com>
PR target/85246
* config/nvptx/nvptx.c (nvptx_single): Don't use partitioning when
propagating branch condition calculated in vector-worker-neutered code.
* testsuite/libgomp.oacc-fortran/gemm.f90: Use
-foffload=-mlong-vector-in-workers.
---
gcc/config/nvptx/nvptx.c | 3 ++-
libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
@@ -4306,13 +4306,14 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
broadcast_data_t data;
unsigned size = GET_MODE_SIZE (SImode);
bool vector = (GOMP_DIM_MASK (GOMP_DIM_VECTOR) == mask) != 0;
+ bool worker = (GOMP_DIM_MASK (GOMP_DIM_WORKER) == mask) != 0;
rtx barrier = GEN_INT (0);
int threads = 0;
data.base = oacc_bcast_sym;
data.ptr = 0;
- bool use_partitioning_p = (vector
+ bool use_partitioning_p = (vector && !worker
&& nvptx_mach_max_workers () > 1
&& cfun->machine->bcast_partition);
if (use_partitioning_p)
@@ -1,6 +1,7 @@
! Exercise three levels of parallelism using SGEMM from BLAS.
! { dg-additional-options "-fopenacc-dim=-:-:128" }
+! { dg-additional-options "-foffload=-mlong-vector-in-workers" }
! Implicitly set vector_length to 128 using -fopenacc-dim.
subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)