config/nvptx: Handle downward compat for OpenMP context selector
Nvptx's compute capabilities (SM_XX) are downward compatible, i.e. SM_80
supports all features of SM_30, SM_70 etc. Additionally, GCC's -march=
currently only supports those values that actually change the generated
code - and offers -march=... to map higher values to the next lower
supported version.
Update libgomp.texi to document the downward compatibility and case
sensitivity of the context selectors.
gcc/ChangeLog:
* config/nvptx/nvptx-sm.def (NVPTX_SM_COMPAT): Add compute
capabilities supported by -march-map= lower than sm_80 (= highest
supported -march=).
* config/nvptx/gen-omp-device-properties.sh: Hande it.
* config/nvptx/gen-h.sh: Ignore it.
* config/nvptx/gen-multilib-matches.sh: Likewise.
* config/nvptx/gen-opt.sh: Likewise.
* config/nvptx/nvptx.cc (sm_version_to_number): New.
(nvptx_omp_device_kind_arch_isa): Match when requested ISA (sm_XX)
version is lower than actual ISA version.
libgomp/ChangeLog:
* libgomp.texi (OpenMP Context Selectors): Add note about case
sensitivity and downward compatibility.
* testsuite/libgomp.c/declare-variant-3.h: Extend to check for
downward compatibility.
* testsuite/libgomp.c/declare-variant-3-sm30.c: Update.
* testsuite/libgomp.c/declare-variant-3-sm35.c: Likewise.
* testsuite/libgomp.c/declare-variant-3-sm53.c: Likewise.
* testsuite/libgomp.c/declare-variant-3-sm70.c: Likewise.
* testsuite/libgomp.c/declare-variant-3-sm75.c: Likewise.
* testsuite/libgomp.c/declare-variant-3-sm80.c: Likewise.
* testsuite/libgomp.c/declare-variant-3.c: Likewise.
gcc/config/nvptx/gen-h.sh | 2 +-
gcc/config/nvptx/gen-multilib-matches.sh | 2 +-
gcc/config/nvptx/gen-omp-device-properties.sh | 2 +-
gcc/config/nvptx/gen-opt.sh | 2 +-
gcc/config/nvptx/nvptx-sm.def | 22 +++++++
gcc/config/nvptx/nvptx.cc | 33 ++++++++--
.../testsuite/libgomp.c/declare-variant-3-sm30.c | 3 +-
.../testsuite/libgomp.c/declare-variant-3-sm35.c | 3 +-
.../testsuite/libgomp.c/declare-variant-3-sm53.c | 3 +-
.../testsuite/libgomp.c/declare-variant-3-sm70.c | 3 +-
.../testsuite/libgomp.c/declare-variant-3-sm75.c | 3 +-
.../testsuite/libgomp.c/declare-variant-3-sm80.c | 1 +
libgomp/testsuite/libgomp.c/declare-variant-3.c | 8 ++-
libgomp/testsuite/libgomp.c/declare-variant-3.h | 75 ++++++++++++++++++++--
14 files changed, 140 insertions(+), 22 deletions(-)
@@ -21,7 +21,7 @@
nvptx_sm_def="$1/nvptx-sm.def"
gen_copyright_sh="$1/gen-copyright.sh"
-sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
+sms=$(grep '^NVPTX_SM[^_]' $nvptx_sm_def | sed 's/.*(//;s/,.*//')
cat <<EOF
/* -*- buffer-read-only: t -*-
@@ -26,7 +26,7 @@ nvptx_sm_def="$1/nvptx-sm.def"
multilib_options_isa_default=$2
multilib_options_isa_list=$3
-sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
+sms=$(grep '^NVPTX_SM[^_]' $nvptx_sm_def | sed 's/.*(//;s/,.*//')
# Every variant in 'sms' has to either be remapped to the default variant
# ('.', which is always built), or does get built as non-default variant
@@ -20,7 +20,7 @@
nvptx_sm_def="$1/nvptx-sm.def"
-sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
+sms=$(grep '^NVPTX_SM' $nvptx_sm_def | sed 's/.*(//;s/[,)].*//')
echo kind: gpu
echo arch: nvptx nvptx64
@@ -21,7 +21,7 @@
nvptx_sm_def="$1/nvptx-sm.def"
gen_copyright_sh="$1/gen-copyright.sh"
-sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
+sms=$(grep '^NVPTX_SM[^_]' $nvptx_sm_def | sed 's/.*(//;s/,.*//')
last=
for sm in $sms; do
@@ -20,11 +20,33 @@
#define NVPTX_SM_SEP
#endif
+#ifndef NVPTX_SM_COMPAT
+#define NVPTX_SM_COMPAT(sm)
+#endif
+
+/* Compute capabilities follow the onion layer model, i.e. higher versions
+ include features of the lower one, except with 'a' suffix (like: 'sm_90a').
+ This affects the OpenMP context selectors. NVPTX_SM_COMPAT is used this
+ in gen-omp-device-properties.sh and, hence, only lists compute capabilites
+ lower than the highest suppored version.
+
+ FIXME: When adding NVPTX_SM '90a', update nvptx_omp_device_kind_arch_isa
+ to handle the lack of upward compability, denoted by the 'a' suffix. */
+
NVPTX_SM (30, NVPTX_SM_SEP)
+NVPTX_SM_COMPAT (32)
NVPTX_SM (35, NVPTX_SM_SEP)
+NVPTX_SM_COMPAT (37)
+NVPTX_SM_COMPAT (50)
+NVPTX_SM_COMPAT (52)
NVPTX_SM (53, NVPTX_SM_SEP)
+NVPTX_SM_COMPAT (60)
+NVPTX_SM_COMPAT (61)
+NVPTX_SM_COMPAT (62)
NVPTX_SM (70, NVPTX_SM_SEP)
+NVPTX_SM_COMPAT (72)
NVPTX_SM (75, NVPTX_SM_SEP)
NVPTX_SM (80,)
#undef NVPTX_SM_SEP
+#undef NVPTX_SM_COMPAT
@@ -308,6 +308,21 @@ sm_version_to_string (enum ptx_isa sm)
}
}
+static unsigned
+sm_version_to_number (enum ptx_isa sm)
+{
+ switch (sm)
+ {
+#define NVPTX_SM(XX, SEP) \
+ case PTX_ISA_SM ## XX: \
+ return XX;
+#include "nvptx-sm.def"
+#undef NVPTX_SM
+ default:
+ gcc_unreachable ();
+ }
+}
+
static void
handle_ptx_version_option (void)
{
@@ -6406,14 +6421,18 @@ nvptx_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
return (strcmp (name, "nvptx") == 0
|| (TARGET_ABI64 && strcmp (name, "nvptx64") == 0));
case omp_device_isa:
-#define NVPTX_SM(XX, SEP) \
- { \
- if (strcmp (name, "sm_" #XX) == 0) \
- return ptx_isa_option == PTX_ISA_SM ## XX; \
+ {
+ if (!startswith (name, "sm_"))
+ return 0;
+ char *end = NULL;
+ long req = strtol (&name[3], &end, 10);
+ if (end == &name[3] || end[0] != '\0')
+ return 0;
+ long curr = sm_version_to_number ((enum ptx_isa) ptx_isa_option);
+ if (req <= curr)
+ return 1;
+ return 0;
}
-#include "nvptx-sm.def"
-#undef NVPTX_SM
- return 0;
default:
gcc_unreachable ();
}
@@ -5,4 +5,5 @@
#include "declare-variant-3.h"
-/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f30 \\(\\);" "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= f30 \\(\\);" 1 "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= g \\(\\);" 1 "optimized" } } */
@@ -5,4 +5,5 @@
#include "declare-variant-3.h"
-/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f35 \\(\\);" "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= f35 \\(\\);" 1 "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= g30 \\(\\);" 1 "optimized" } } */
@@ -5,4 +5,5 @@
#include "declare-variant-3.h"
-/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f53 \\(\\);" "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= f53 \\(\\);" 1 "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= g35 \\(\\);" 1 "optimized" } } */
@@ -5,4 +5,5 @@
#include "declare-variant-3.h"
-/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f70 \\(\\);" "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= f70 \\(\\);" 1 "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= g53 \\(\\);" 1 "optimized" } } */
@@ -5,4 +5,5 @@
#include "declare-variant-3.h"
-/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f75 \\(\\);" "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= f75 \\(\\);" 1 "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times "= g70 \\(\\);" 1 "optimized" } } */
@@ -6,3 +6,4 @@
#include "declare-variant-3.h"
/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f80 \\(\\);" "optimized" } } */
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= g70 \\(\\);" "optimized" } } */
@@ -3,6 +3,10 @@
#include "declare-variant-3.h"
-/* { dg-final { scan-tree-dump "= f \\(\\);" "optimized" } }
+/* { dg-final { scan-tree-dump-times "= f \\(\\);" 1 "optimized" } }
+ { dg-final { scan-tree-dump-times "= g \\(\\);" 1 "optimized" } }
{ dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= f \\(\\);" "optimized" { target offload_target_amdgcn } } }
- { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f\[0-9\]+ \\(\\);" "optimized" { target offload_target_nvptx } } } */
+ { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= g \\(\\);" "optimized" { target offload_target_amdgcn } } }
+ { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-not "= f \\(\\);" "optimized" { target offload_target_nvptx } } }
+ { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f\[0-9\]+ \\(\\);" "optimized" { target offload_target_nvptx } } }
+ { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= g\[0-9\]* \\(\\);" "optimized" { target offload_target_nvptx } } } */
@@ -55,23 +55,90 @@ f (void)
return 0;
}
+__attribute__ ((noipa))
+int
+g30 (void)
+{
+ return 30;
+}
+
+__attribute__ ((noipa))
+int
+g35 (void)
+{
+ return 35;
+}
+
+__attribute__ ((noipa))
+int
+g53 (void)
+{
+ return 53;
+}
+
+__attribute__ ((noipa))
+int
+g70 (void)
+{
+ return 70;
+}
+
+__attribute__ ((noipa))
+int
+g75 (void)
+{
+ return 75;
+}
+
+__attribute__ ((noipa))
+int
+g80 (void)
+{
+ return 80;
+}
+
+#pragma omp declare variant (g30) match (device={isa("sm_32")})
+#pragma omp declare variant (g35) match (device={isa("sm_52")})
+#pragma omp declare variant (g53) match (device={isa("sm_62")})
+#pragma omp declare variant (g70) match (device={isa("sm_72")})
+#pragma omp declare variant (g80) match (device={isa("sm_86")})
+__attribute__ ((noipa))
+int
+g (void)
+{
+ return 0;
+}
+
+
#pragma omp end declare target
int
main (void)
{
- int v = 0;
+ int v = -49;
+ int w = -49;
- #pragma omp target map(from:v)
- v = f ();
+ #pragma omp target map(from:v,w)
+ {
+ v = f ();
+ w = g ();
+ }
#ifdef OFFLOAD_DEVICE_NVPTX
if (v == 0)
__builtin_abort ();
+ if (v == 30)
+ {
+ if (w != 0)
+ __builtin_abort ();
+ }
+ else if (w >= v)
+ __builtin_abort ();
+
__builtin_printf ("Nvptx accelerator: sm_%d\n", v);
#else
- if (v != 0)
+ if (v != 0 || w != 0)
__builtin_abort ();
#endif