2016-08-30 Nathan Sidwell <nathan@codesourcery.com>
* plugin/plugin-nvptx.c (nvptx_exec): Interrogate board attributes
to determine default geometry.
===================================================================
@@ -938,14 +938,42 @@ nvptx_exec (void (*fn), size_t mapnum, v
}
}
- /* Do some sanity checking. The CUDA API doesn't appear to
- provide queries to determine these limits. */
+ int warp_size, block_size, dev_size, cpu_size;
+ CUdevice dev = nvptx_thread()->ptx_dev->dev;
+ /* 32 is the default for known hardware. */
+ int gang = 0, worker = 32, vector = 32;
+
+ if (CUDA_SUCCESS == cuDeviceGetAttribute
+ (&block_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev)
+ && CUDA_SUCCESS == cuDeviceGetAttribute
+ (&warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE, dev)
+ && CUDA_SUCCESS == cuDeviceGetAttribute
+ (&dev_size, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev)
+ && CUDA_SUCCESS == cuDeviceGetAttribute
+ (&cpu_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, dev))
+ {
+ GOMP_PLUGIN_debug (0, " warp_size=%d, block_size=%d,"
+ " dev_size=%d, cpu_size=%d\n",
+ warp_size, block_size, dev_size, cpu_size);
+ gang = (cpu_size / block_size) * dev_size;
+ worker = block_size / warp_size;
+ vector = warp_size;
+ }
+
+ /* There is no upper bound on the gang size. The best size
+ matches the hardware configuration. Logical gangs are
+ scheduled onto physical hardware. To maximize usage, we
+ should guess a large number. */
if (default_dims[GOMP_DIM_GANG] < 1)
- default_dims[GOMP_DIM_GANG] = 32;
+ default_dims[GOMP_DIM_GANG] = gang ? gang : 1024;
+ /* The worker size must not exceed the hardware. */
if (default_dims[GOMP_DIM_WORKER] < 1
- || default_dims[GOMP_DIM_WORKER] > 32)
- default_dims[GOMP_DIM_WORKER] = 32;
- default_dims[GOMP_DIM_VECTOR] = 32;
+ || (default_dims[GOMP_DIM_WORKER] > worker && gang))
+ default_dims[GOMP_DIM_WORKER] = worker;
+ /* The vector size must exactly match the hardware. */
+ if (default_dims[GOMP_DIM_VECTOR] < 1
+ || (default_dims[GOMP_DIM_VECTOR] != vector && gang))
+ default_dims[GOMP_DIM_VECTOR] = vector;
GOMP_PLUGIN_debug (0, " default dimensions [%d,%d,%d]\n",
default_dims[GOMP_DIM_GANG],