@@ -82,6 +82,8 @@ typedef enum {
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
} CUdevice_attribute;
@@ -72,7 +72,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c
+ oacc-target.c target-indirect.c selector.c
include $(top_srcdir)/plugin/Makefrag.am
@@ -219,7 +219,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo target-indirect.lo $(am__objects_1)
+ oacc-target.lo target-indirect.lo selector.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +552,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c $(am__append_3)
+ oacc-target.c target-indirect.c selector.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -777,6 +777,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scope.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/selector.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
new file mode 100644
@@ -0,0 +1,57 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ an AMD GCN GPU. */
+
+#include "libgomp.h"
+#include <string.h>
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+
+ if (arch && strcmp (arch, "gcn") != 0)
+ return false;
+
+ if (!isa)
+ return true;
+
+#ifdef __GCN3__
+ if (strcmp (isa, "fiji") == 0 || strcmp (isa, "gfx803") == 0)
+ return true;
+#endif
+
+#ifdef __GCN5__
+ if (strcmp (isa, "gfx900") == 0 || strcmp (isa, "gfx906") != 0
+ || strcmp (isa, "gfx908") == 0)
+ return true;
+#endif
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains a generic implementation of
+ GOMP_evaluate_current_device when run on a Linux host. */
+
+#include <string.h>
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "cpu") != 0)
+ return false;
+
+ if (!arch && !isa)
+ return true;
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,325 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ an x86/x64-based Linux host. */
+
+#include <string.h>
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "cpu") != 0)
+ return false;
+
+ if (arch
+ && strcmp (arch, "x86") != 0
+ && strcmp (arch, "ia32") != 0
+#ifdef __x86_64__
+ && strcmp (arch, "x86_64") != 0
+#endif
+#ifdef __ILP32__
+ && strcmp (arch, "x32") != 0
+#endif
+ && strcmp (arch, "i386") != 0
+ && strcmp (arch, "i486") != 0
+#ifndef __i486__
+ && strcmp (arch, "i586") != 0
+#endif
+#if !defined (__i486__) && !defined (__i586__)
+ && strcmp (arch, "i686") != 0
+#endif
+ )
+ return false;
+
+ if (!isa)
+ return true;
+
+#ifdef __WBNOINVD__
+ if (strcmp (isa, "wbnoinvd") == 0) return true;
+#endif
+#ifdef __AVX512VP2INTERSECT__
+ if (strcmp (isa, "avx512vp2intersect") == 0) return true;
+#endif
+#ifdef __MMX__
+ if (strcmp (isa, "mmx") == 0) return true;
+#endif
+#ifdef __3dNOW__
+ if (strcmp (isa, "3dnow") == 0) return true;
+#endif
+#ifdef __3dNOW_A__
+ if (strcmp (isa, "3dnowa") == 0) return true;
+#endif
+#ifdef __SSE__
+ if (strcmp (isa, "sse") == 0) return true;
+#endif
+#ifdef __SSE2__
+ if (strcmp (isa, "sse2") == 0) return true;
+#endif
+#ifdef __SSE3__
+ if (strcmp (isa, "sse3") == 0) return true;
+#endif
+#ifdef __SSSE3__
+ if (strcmp (isa, "ssse3") == 0) return true;
+#endif
+#ifdef __SSE4_1__
+ if (strcmp (isa, "sse4.1") == 0) return true;
+#endif
+#ifdef __SSE4_2__
+ if (strcmp (isa, "sse4") == 0 || strcmp (isa, "sse4.2") == 0) return true;
+#endif
+#ifdef __AES__
+ if (strcmp (isa, "aes") == 0) return true;
+#endif
+#ifdef __SHA__
+ if (strcmp (isa, "sha") == 0) return true;
+#endif
+#ifdef __PCLMUL__
+ if (strcmp (isa, "pclmul") == 0) return true;
+#endif
+#ifdef __AVX__
+ if (strcmp (isa, "avx") == 0) return true;
+#endif
+#ifdef __AVX2__
+ if (strcmp (isa, "avx2") == 0) return true;
+#endif
+#ifdef __AVX512F__
+ if (strcmp (isa, "avx512f") == 0) return true;
+#endif
+#ifdef __AVX512ER__
+ if (strcmp (isa, "avx512er") == 0) return true;
+#endif
+#ifdef __AVX512CD__
+ if (strcmp (isa, "avx512cd") == 0) return true;
+#endif
+#ifdef __AVX512PF__
+ if (strcmp (isa, "avx512pf") == 0) return true;
+#endif
+#ifdef __AVX512DQ__
+ if (strcmp (isa, "avx512dq") == 0) return true;
+#endif
+#ifdef __AVX512BW__
+ if (strcmp (isa, "avx512bw") == 0) return true;
+#endif
+#ifdef __AVX512VL__
+ if (strcmp (isa, "avx512vl") == 0) return true;
+#endif
+#ifdef __AVX512VBMI__
+ if (strcmp (isa, "avx512vbmi") == 0) return true;
+#endif
+#ifdef __AVX512IFMA__
+ if (strcmp (isa, "avx512ifma") == 0) return true;
+#endif
+#ifdef __AVX5124VNNIW__
+ if (strcmp (isa, "avx5124vnniw") == 0) return true;
+#endif
+#ifdef __AVX512VBMI2__
+ if (strcmp (isa, "avx512vbmi2") == 0) return true;
+#endif
+#ifdef __AVX512VNNI__
+ if (strcmp (isa, "avx512vnni") == 0) return true;
+#endif
+#ifdef __PCONFIG__
+ if (strcmp (isa, "pconfig") == 0) return true;
+#endif
+#ifdef __SGX__
+ if (strcmp (isa, "sgx") == 0) return true;
+#endif
+#ifdef __AVX5124FMAPS__
+ if (strcmp (isa, "avx5124fmaps") == 0) return true;
+#endif
+#ifdef __AVX512BITALG__
+ if (strcmp (isa, "avx512bitalg") == 0) return true;
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+ if (strcmp (isa, "avx512vpopcntdq") == 0) return true;
+#endif
+#ifdef __FMA__
+ if (strcmp (isa, "fma") == 0) return true;
+#endif
+#ifdef __RTM__
+ if (strcmp (isa, "rtm") == 0) return true;
+#endif
+#ifdef __SSE4A__
+ if (strcmp (isa, "sse4a") == 0) return true;
+#endif
+#ifdef __FMA4__
+ if (strcmp (isa, "fma4") == 0) return true;
+#endif
+#ifdef __XOP__
+ if (strcmp (isa, "xop") == 0) return true;
+#endif
+#ifdef __LWP__
+ if (strcmp (isa, "lwp") == 0) return true;
+#endif
+#ifdef __ABM__
+ if (strcmp (isa, "abm") == 0) return true;
+#endif
+#ifdef __BMI__
+ if (strcmp (isa, "bmi") == 0) return true;
+#endif
+#ifdef __BMI2__
+ if (strcmp (isa, "bmi2") == 0) return true;
+#endif
+#ifdef __LZCNT__
+ if (strcmp (isa, "lzcnt") == 0) return true;
+#endif
+#ifdef __TBM__
+ if (strcmp (isa, "tbm") == 0) return true;
+#endif
+#ifdef __CRC32__
+ if (strcmp (isa, "crc32") == 0) return true;
+#endif
+#ifdef __POPCNT__
+ if (strcmp (isa, "popcnt") == 0) return true;
+#endif
+#ifdef __FSGSBASE__
+ if (strcmp (isa, "fsgsbase") == 0) return true;
+#endif
+#ifdef __RDRND__
+ if (strcmp (isa, "rdrnd") == 0) return true;
+#endif
+#ifdef __F16C__
+ if (strcmp (isa, "f16c") == 0) return true;
+#endif
+#ifdef __RDSEED__
+ if (strcmp (isa, "rdseed") == 0) return true;
+#endif
+#ifdef __PRFCHW__
+ if (strcmp (isa, "prfchw") == 0) return true;
+#endif
+#ifdef __ADX__
+ if (strcmp (isa, "adx") == 0) return true;
+#endif
+#ifdef __FXSR__
+ if (strcmp (isa, "fxsr") == 0) return true;
+#endif
+#ifdef __XSAVE__
+ if (strcmp (isa, "xsave") == 0) return true;
+#endif
+#ifdef __XSAVEOPT__
+ if (strcmp (isa, "xsaveopt") == 0) return true;
+#endif
+#ifdef __PREFETCHWT1__
+ if (strcmp (isa, "prefetchwt1") == 0) return true;
+#endif
+#ifdef __CLFLUSHOPT__
+ if (strcmp (isa, "clflushopt") == 0) return true;
+#endif
+#ifdef __CLZERO__
+ if (strcmp (isa, "clzero") == 0) return true;
+#endif
+#ifdef __XSAVEC__
+ if (strcmp (isa, "xsavec") == 0) return true;
+#endif
+#ifdef __XSAVES__
+ if (strcmp (isa, "xsaves") == 0) return true;
+#endif
+#ifdef __CLWB__
+ if (strcmp (isa, "clwb") == 0) return true;
+#endif
+#ifdef __MWAITX__
+ if (strcmp (isa, "mwaitx") == 0) return true;
+#endif
+#ifdef __PKU__
+ if (strcmp (isa, "pku") == 0) return true;
+#endif
+#ifdef __RDPID__
+ if (strcmp (isa, "rdpid") == 0) return true;
+#endif
+#ifdef __GFNI__
+ if (strcmp (isa, "gfni") == 0) return true;
+#endif
+#ifdef __SHSTK__
+ if (strcmp (isa, "shstk") == 0) return true;
+#endif
+#ifdef __VAES__
+ if (strcmp (isa, "vaes") == 0) return true;
+#endif
+#ifdef __VPCLMULQDQ__
+ if (strcmp (isa, "vpclmulqdq") == 0) return true;
+#endif
+#ifdef __MOVDIRI__
+ if (strcmp (isa, "movdiri") == 0) return true;
+#endif
+#ifdef __MOVDIR64B__
+ if (strcmp (isa, "movdir64b") == 0) return true;
+#endif
+#ifdef __WAITPKG__
+ if (strcmp (isa, "waitpkg") == 0) return true;
+#endif
+#ifdef __CLDEMOTE__
+ if (strcmp (isa, "cldemote") == 0) return true;
+#endif
+#ifdef __SERIALIZE__
+ if (strcmp (isa, "serialize") == 0) return true;
+#endif
+#ifdef __PTWRITE__
+ if (strcmp (isa, "ptwrite") == 0) return true;
+#endif
+#ifdef __AVX512BF16__
+ if (strcmp (isa, "avx512bf16") == 0) return true;
+#endif
+#ifdef __AVX512FP16__
+ if (strcmp (isa, "avx512fp16") == 0) return true;
+#endif
+#ifdef __ENQCMD__
+ if (strcmp (isa, "enqcmd") == 0) return true;
+#endif
+#ifdef __TSXLDTRK__
+ if (strcmp (isa, "tsxldtrk") == 0) return true;
+#endif
+#ifdef __AMX_TILE__
+ if (strcmp (isa, "amx-tile") == 0) return true;
+#endif
+#ifdef __AMX_INT8__
+ if (strcmp (isa, "amx-int8") == 0) return true;
+#endif
+#ifdef __AMX_BF16__
+ if (strcmp (isa, "amx-bf16") == 0) return true;
+#endif
+#ifdef __LAHF_SAHF__
+ if (strcmp (isa, "sahf") == 0) return true;
+#endif
+#ifdef __MOVBE__
+ if (strcmp (isa, "movbe") == 0) return true;
+#endif
+#ifdef __UINTR__
+ if (strcmp (isa, "uintr") == 0) return true;
+#endif
+#ifdef __HRESET__
+ if (strcmp (isa, "hreset") == 0) return true;
+#endif
+#ifdef __KL__
+ if (strcmp (isa, "kl") == 0) return true;
+#endif
+#ifdef __WIDEKL__
+ if (strcmp (isa, "widekl") == 0) return true;
+#endif
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,65 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ a Nvidia GPU. */
+
+#include "libgomp.h"
+#include <string.h>
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+
+ if (arch && strcmp (arch, "nvptx") != 0)
+ return false;
+
+ if (!isa)
+ return true;
+
+ if (strcmp (isa, "sm_30") == 0)
+ return true;
+#if __PTX_SM__ >= 350
+ if (strcmp (isa, "sm_35") == 0)
+ return true;
+#endif
+#if __PTX_SM__ >= 530
+ if (strcmp (isa, "sm_53") == 0)
+ return true;
+#endif
+#if __PTX_SM__ >= 750
+ if (strcmp (isa, "sm_75") == 0)
+ return true;
+#endif
+#if __PTX_SM__ >= 800
+ if (strcmp (isa, "sm_80") == 0)
+ return true;
+#endif
+
+ return false;
+}
@@ -152,6 +152,8 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *,
extern bool GOMP_OFFLOAD_can_run (void *);
extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
+extern bool GOMP_OFFLOAD_evaluate_device (int, const char *, const char *,
+ const char *);
extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **,
void **, unsigned *, void *);
@@ -1415,6 +1415,7 @@ struct gomp_device_descr
__typeof (GOMP_OFFLOAD_can_run) *can_run_func;
__typeof (GOMP_OFFLOAD_run) *run_func;
__typeof (GOMP_OFFLOAD_async_run) *async_run_func;
+ __typeof (GOMP_OFFLOAD_evaluate_device) *evaluate_device_func;
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;
@@ -414,6 +414,7 @@ GOMP_5.1 {
GOMP_scope_start;
GOMP_warning;
GOMP_teams4;
+ GOMP_evaluate_target_device;
} GOMP_5.0.1;
GOMP_5.1.1 {
@@ -337,6 +337,11 @@ extern void GOMP_single_copy_end (void *);
extern void GOMP_scope_start (uintptr_t *);
+/* selector.c */
+
+extern bool GOMP_evaluate_current_device (const char *, const char *,
+ const char *);
+
/* target.c */
extern void GOMP_target (int, void (*) (void *), const void *,
@@ -359,6 +364,9 @@ extern void GOMP_teams (unsigned int, unsigned int);
extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
extern void *GOMP_target_map_indirect_ptr (void *);
+extern bool GOMP_evaluate_target_device (int, const char *, const char *,
+ const char *);
+
/* teams.c */
extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
@@ -136,6 +136,16 @@ host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
fn (vars);
}
+static bool
+host_evaluate_device (int device_num __attribute__ ((unused)),
+ const char *kind __attribute__ ((unused)),
+ const char *arch __attribute__ ((unused)),
+ const char *isa __attribute__ ((unused)))
+{
+ __builtin_unreachable ();
+ return false;
+}
+
static void
host_openacc_exec (void (*fn) (void *),
size_t mapnum __attribute__ ((unused)),
@@ -285,6 +295,7 @@ static struct gomp_device_descr host_dispatch =
.memcpy2d_func = NULL,
.memcpy3d_func = NULL,
.run_func = host_run,
+ .evaluate_device_func = host_evaluate_device,
.mem_map = { NULL },
.mem_map_rev = { NULL },
@@ -3984,6 +3984,20 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
GOMP_PLUGIN_target_task_completion, async_data);
}
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ struct agent_info *agent = get_agent_info (device_num);
+
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+ if (arch && strcmp (arch, "gcn") != 0)
+ return false;
+
+ return !isa || isa_code (isa) == agent->device_isa;
+}
+
/* }}} */
/* {{{ OpenACC Plugin API */
@@ -317,6 +317,7 @@ struct ptx_device
int max_threads_per_block;
int max_threads_per_multiprocessor;
int default_dims[GOMP_DIM_MAX];
+ int compute_major, compute_minor;
/* Length as used by the CUDA Runtime API ('struct cudaDeviceProp'). */
char name[256];
@@ -541,6 +542,14 @@ nvptx_open_device (int n)
for (int i = 0; i != GOMP_DIM_MAX; i++)
ptx_dev->default_dims[i] = 0;
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
+ ptx_dev->compute_major = pi;
+
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
+ ptx_dev->compute_minor = pi;
+
CUDA_CALL_ERET (NULL, cuDeviceGetName, ptx_dev->name, sizeof ptx_dev->name,
dev);
@@ -2312,3 +2321,39 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
}
/* TODO: Implement GOMP_OFFLOAD_async_run. */
+
+#define CHECK_ISA(major, minor) \
+ if (device->compute_major >= major && device->compute_minor >= minor \
+ && strcmp (isa, "sm_"#major#minor) == 0) \
+ return true
+
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+ if (arch && strcmp (arch, "nvptx") != 0)
+ return false;
+ if (!isa)
+ return true;
+
+ struct ptx_device *device = ptx_devices[device_num];
+
+ CHECK_ISA (3, 0);
+ CHECK_ISA (3, 5);
+ CHECK_ISA (3, 7);
+ CHECK_ISA (5, 0);
+ CHECK_ISA (5, 2);
+ CHECK_ISA (5, 3);
+ CHECK_ISA (6, 0);
+ CHECK_ISA (6, 1);
+ CHECK_ISA (6, 2);
+ CHECK_ISA (7, 0);
+ CHECK_ISA (7, 2);
+ CHECK_ISA (7, 5);
+ CHECK_ISA (8, 0);
+ CHECK_ISA (8, 6);
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains a placeholder implementation of
+ GOMP_evaluate_current_device. */
+
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ return false;
+}
@@ -5088,6 +5088,43 @@ omp_pause_resource_all (omp_pause_resource_t kind)
ialias (omp_pause_resource)
ialias (omp_pause_resource_all)
+bool
+GOMP_evaluate_target_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ bool result = true;
+
+ if (device_num < 0)
+ device_num = omp_get_default_device ();
+
+ if (kind && strcmp (kind, "any") == 0)
+ kind = NULL;
+
+ gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
+ __FUNCTION__, device_num, kind, arch, isa);
+
+ if (omp_get_device_num () == device_num)
+ result = GOMP_evaluate_current_device (kind, arch, isa);
+ else
+ {
+ if (!omp_is_initial_device ())
+ /* Accelerators are not expected to know about other devices. */
+ result = false;
+ else
+ {
+ struct gomp_device_descr *device = resolve_device (device_num, true);
+ if (device == NULL)
+ result = false;
+ else if (device->evaluate_device_func)
+ result = device->evaluate_device_func (device_num, kind, arch,
+ isa);
+ }
+ }
+
+ gomp_debug (1, " -> %s\n", result ? "true" : "false");
+ return result;
+}
+
#ifdef PLUGIN_SUPPORT
/* This function tries to load a plugin for DEVICE. Name of plugin is passed
@@ -5140,6 +5177,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM (free);
DLSYM (dev2host);
DLSYM (host2dev);
+ DLSYM (evaluate_device);
DLSYM_OPT (memcpy2d, memcpy2d);
DLSYM_OPT (memcpy3d, memcpy3d);
device->capabilities = device->get_caps_func ();
From: Kwok Cheung Yeung <kcy@codesourcery.com> This patch implements the libgomp runtime support for the dynamic target_device selector via the GOMP_evaluate_target_device function. include/ChangeLog * cuda/cuda.h (CUdevice_attribute): Add definitions for CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR and CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR. libgomp/ChangeLog * Makefile.am (libgomp_la_SOURCES): Add selector.c. * Makefile.am: Regenerate. * config/gcn/selector.c: New. * config/linux/selector.c: New. * config/linux/x86/selector.c: New. * config/nvptx/selector.c: New. * libgomp-plugin.h (GOMP_OFFLOAD_evaluate_device): New. * libgomp.h (struct gomp_device_descr): Add evaluate_device_func field. * libgomp.map (GOMP_5.1): Add GOMP_evaluate_target_device. * libgomp_g.h (GOMP_evaluate_current_device): New. (GOMP_evaluate_target_device): New. * oacc-host.c (host_evaluate_device): New. (host_openacc_exec): Initialize evaluate_device_func field to host_evaluate_device. * plugin/plugin-gcn.c (GOMP_OFFLOAD_evaluate_device): New. * plugin/plugin-nvptx.c (struct ptx_device): Add compute_major and compute_minor fields. (nvptx_open_device): Read compute capability information from device. (CHECK_ISA): New macro. (GOMP_OFFLOAD_evaluate_device): New. * selector.c: New. * target.c (GOMP_evaluate_target_device): New. (gomp_load_plugin_for_device): Load evaluate_device plugin function. --- include/cuda/cuda.h | 2 + libgomp/Makefile.am | 2 +- libgomp/Makefile.in | 5 +- libgomp/config/gcn/selector.c | 57 +++++ libgomp/config/linux/selector.c | 43 ++++ libgomp/config/linux/x86/selector.c | 325 ++++++++++++++++++++++++++++ libgomp/config/nvptx/selector.c | 65 ++++++ libgomp/libgomp-plugin.h | 2 + libgomp/libgomp.h | 1 + libgomp/libgomp.map | 1 + libgomp/libgomp_g.h | 8 + libgomp/oacc-host.c | 11 + libgomp/plugin/plugin-gcn.c | 14 ++ libgomp/plugin/plugin-nvptx.c | 45 ++++ libgomp/selector.c | 36 +++ libgomp/target.c | 38 ++++ 16 files changed, 652 insertions(+), 3 deletions(-) create mode 100644 libgomp/config/gcn/selector.c create mode 100644 libgomp/config/linux/selector.c create mode 100644 libgomp/config/linux/x86/selector.c create mode 100644 libgomp/config/nvptx/selector.c create mode 100644 libgomp/selector.c