From 3effb58391010784a14c760e50550af639fe8b51 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian@codesourcery.com>
Date: Mon, 22 Sep 2014 03:27:53 -0700
Subject: [PATCH 4/5] OpenACC tests.
xxxx-xx-xx James Norris <jnorris@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
libgomp/
* testsuite/lib/libgomp.exp (libgomp-test-support.exp): Include.
(libgomp_init): Add include directory for gomp-constants.h to ALWAYS_CFLAGS.
Support build-tree and installed testing, and passing environment variables
to remote test machines.
(libgomp_target_compile): Don't set compiler=$GCC_UNDER_TEST in options.
(check_effective_target_openacc_nvidia_accel_present)
(check_effective_target_openacc_nvidia_accel_selected): New
functions.
* testsuite/libgomp.oacc-fortran/fortran.exp: New exp file.
* testsuite/libgomp.oacc-fortran/*.f: New tests.
* testsuite/libgomp.oacc-fortran/*.f90: Likewise.
* testsuite/libgomp.oacc-c/c.exp: New exp file.
* testsuite/libgomp.oacc-c/context1.c, testsuite/libgomp.oacc-c/context3.c:
New tests.
* testsuite/libgomp.oacc-c++/c++.exp: New exp file.
* testsuite/libgomp.oacc-c-c++-common/subr.ptx: New file.
* testsuite/libgomp.oacc-c-c++-common/subr.h: New file.
* testsuite/libgomp.oacc-c-c++-common/timer.h: New file.
* testsuite/libgomp.oacc-c-c++-common/*.c: New tests.
---
libgomp/testsuite/lib/libgomp.exp | 101 +++++++++-
libgomp/testsuite/libgomp.oacc-c++/c++.exp | 119 +++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-1.c | 24 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-10.c | 58 ++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-11.c | 22 ++
.../testsuite/libgomp.oacc-c-c++-common/lib-12.c | 37 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-13.c | 60 ++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-14.c | 61 ++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-15.c | 33 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-16.c | 29 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-17.c | 31 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-18.c | 34 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-19.c | 60 ++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-2.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-20.c | 29 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-21.c | 29 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-22.c | 29 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-23.c | 39 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-24.c | 55 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-25.c | 30 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-26.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-27.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-28.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-29.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-3.c | 15 ++
.../testsuite/libgomp.oacc-c-c++-common/lib-30.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-31.c | 27 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-32.c | 38 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-33.c | 31 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-34.c | 33 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-35.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-36.c | 26 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-37.c | 40 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-38.c | 67 ++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-39.c | 41 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-4.c | 13 ++
.../testsuite/libgomp.oacc-c-c++-common/lib-40.c | 42 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-41.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-42.c | 35 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-43.c | 45 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-44.c | 45 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-45.c | 50 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-46.c | 42 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-47.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-48.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-49.c | 48 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-5.c | 40 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-50.c | 30 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-51.c | 41 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-52.c | 28 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-53.c | 28 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-54.c | 28 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-55.c | 48 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-56.c | 33 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-57.c | 28 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-58.c | 28 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-59.c | 55 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-6.c | 39 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-60.c | 54 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-61.c | 70 +++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-62.c | 49 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-63.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-64.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-65.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-66.c | 47 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-67.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-68.c | 43 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-69.c | 124 ++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-7.c | 18 ++
.../testsuite/libgomp.oacc-c-c++-common/lib-70.c | 136 +++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-71.c | 119 +++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-72.c | 121 +++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-73.c | 134 ++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-74.c | 139 +++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-75.c | 141 +++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-76.c | 147 ++++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-77.c | 135 +++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-78.c | 140 +++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-79.c | 167 +++++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-80.c | 132 ++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-81.c | 211 +++++++++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-82.c | 144 +++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-83.c | 58 ++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-84.c | 66 ++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-85.c | 52 +++++
.../testsuite/libgomp.oacc-c-c++-common/lib-86.c | 42 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-87.c | 42 ++++
.../testsuite/libgomp.oacc-c-c++-common/lib-88.c | 111 ++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-89.c | 118 +++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-9.c | 70 +++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-90.c | 137 +++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/lib-92.c | 112 ++++++++++
libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h | 46 +++++
.../testsuite/libgomp.oacc-c-c++-common/subr.ptx | 148 ++++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/timer.h | 103 ++++++++++
libgomp/testsuite/libgomp.oacc-c/c.exp | 86 ++++++++
libgomp/testsuite/libgomp.oacc-c/context-1.c | 213 ++++++++++++++++++++
libgomp/testsuite/libgomp.oacc-c/context-3.c | 200 ++++++++++++++++++
libgomp/testsuite/libgomp.oacc-fortran/fortran.exp | 115 +++++++++++
libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 | 13 ++
libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 | 82 ++++++++
libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 | 82 ++++++++
libgomp/testsuite/libgomp.oacc-fortran/lib-2.f | 13 ++
libgomp/testsuite/libgomp.oacc-fortran/lib-3.f | 13 ++
libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 | 35 ++++
libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 | 31 +++
libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 | 35 ++++
libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 | 31 +++
libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 | 83 ++++++++
libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 | 83 ++++++++
.../libgomp.oacc-fortran/openacc_version-1.f | 9 +
.../libgomp.oacc-fortran/openacc_version-2.f90 | 9 +
112 files changed, 6934 insertions(+), 2 deletions(-)
create mode 100644 libgomp/testsuite/libgomp.oacc-c++/c++.exp
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
create mode 100644 libgomp/testsuite/libgomp.oacc-c/c.exp
create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-1.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-3.c
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
@@ -31,6 +31,9 @@ load_gcc_lib timeout-dg.exp
load_gcc_lib torture-options.exp
load_gcc_lib fortran-modules.exp
+# Try to load a test support file, built during libgomp configuration.
+load_file libgomp-test-support.exp
+
set dg-do-what-default run
#
@@ -139,9 +142,29 @@ proc libgomp_init { args } {
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs"
lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}"
lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs"
+ # The top-level include directory, for libgomp-constants.h.
+ lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
}
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.."
+ # For build-tree testing, also consider the CUDA paths used for builing.
+ # For installed testing, we assume all that to be provided in the sysroot.
+ if { $blddir != "" } {
+ global cuda_driver_include
+ global cuda_driver_lib
+ if { $cuda_driver_include != "" } {
+ # Stop gfortran from freaking out:
+ # Warning: Nonexistent include directory "[...]"
+ if {[file exists $cuda_driver_include]} {
+ lappend ALWAYS_CFLAGS "additional_flags=-I$cuda_driver_include"
+ }
+ }
+ if { $cuda_driver_lib != "" } {
+ lappend ALWAYS_CFLAGS "additional_flags=-L$cuda_driver_lib"
+ append always_ld_library_path ":$cuda_driver_lib"
+ }
+ }
+
# We use atomic operations in the testcases to validate results.
if { ([istarget i?86-*-*] || [istarget x86_64-*-*])
&& [check_effective_target_ia32] } {
@@ -169,6 +192,57 @@ proc libgomp_init { args } {
# Disable color diagnostics
lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never"
+
+ # TODO. Evil hack. DejaGnu doesn't have a mechanism for setting
+ # environment variables on remote boards. Thus, we have to fake it, using
+ # GCC's constructor attributes to create object files that install the
+ # desired environment variables.
+ set e_list [list \
+ [list defaults LIBGOMP_PLUGIN_PATH=$::env(LIBGOMP_PLUGIN_PATH)] \
+ [list ACC_DEVICE_TYPE-host ACC_DEVICE_TYPE=host ] \
+ [list ACC_DEVICE_TYPE-host_nonshm ACC_DEVICE_TYPE=host_nonshm ] \
+ [list ACC_DEVICE_TYPE-nvidia ACC_DEVICE_TYPE=nvidia ] ]
+ foreach e $e_list {
+ set v [lindex $e 0]
+ set s [lindex $e 1]
+ verbose "creating constructor-setenv: $v: $s"
+ set src constructor-setenv-$v.c
+ set obj constructor-setenv-$v.o
+ set f_src [open $src "w"]
+ puts $f_src "static void __attribute__((constructor(1000)))"
+ puts $f_src "init_env(void) {"
+ puts $f_src " int putenv(char *);"
+ puts $f_src " putenv(\"$s\");"
+ puts $f_src "}"
+ if { $v == "defaults" } {
+ # TODO. We want libgomp to initialize after the putenv calls.
+ # But: shared libraries' constructors (and thus
+ # env.c:initialize_env) will be called before the executable's
+ # (init_env functions created above), so it will already have been
+ # initialized (and has to be, in case we're not linking in this
+ # gunk). Assuming no execution of other libgomp functionality in
+ # between (which we're not doing during initialization),
+ # initialize_env's effects are idempotent when calling it again, so
+ # we'll do that now, after the putenv calls have been executed.
+ puts $f_src "static void __attribute__((constructor(1001)))"
+ puts $f_src "init_libgomp(void) {"
+ # Some test cases specify -fno-openmp, so libgomp isn't linked in.
+ puts $f_src " void initialize_env(void) __attribute__((weak));"
+ puts $f_src " if (initialize_env)"
+ puts $f_src " initialize_env();"
+ puts $f_src "}"
+ }
+ close $f_src
+ # TODO. Using whichever compiler is currently configured... At least
+ # switch it into C mode.
+ set lines [libgomp_target_compile $src $obj object "additional_flags=-xc"]
+ # TODO. Error checking.
+ file delete $src
+ }
+ # When adding constructor-setenv-*.o files, make sure to cancel any -x flag
+ # that may have been set before.
+ lappend ALWAYS_CFLAGS "ldflags=-x none"
+ lappend ALWAYS_CFLAGS "ldflags=constructor-setenv-defaults.o"
}
#
@@ -180,7 +254,6 @@ proc libgomp_target_compile { source dest type options } {
global libgomp_compile_options
global gluefile wrap_flags
global ALWAYS_CFLAGS
- global GCC_UNDER_TEST
global lang_test_file
global lang_library_path
global lang_link_flags
@@ -208,7 +281,6 @@ proc libgomp_target_compile { source dest type options } {
lappend options "additional_flags=[libio_include_flags]"
lappend options "timeout=[timeout_value]"
- lappend options "compiler=$GCC_UNDER_TEST"
set options [concat $libgomp_compile_options $options]
@@ -253,3 +325,28 @@ proc check_effective_target_offload_device { } {
}
} ]
}
+
+# Return 1 if at least one nvidia board is present.
+
+proc check_effective_target_openacc_nvidia_accel_present { } {
+ return [check_runtime openacc_nvidia_accel_present {
+ #include <openacc.h>
+ int main () {
+ return !(acc_get_num_devices (acc_device_nvidia) > 0);
+ }
+ } "" ]
+}
+
+# Return 1 if at least one nvidia board is present, and the nvidia device type
+# is selected by default.
+
+proc check_effective_target_openacc_nvidia_accel_selected { } {
+ if { ![check_effective_target_openacc_nvidia_accel_present] } {
+ return 0;
+ }
+ global accel
+ if { $accel == "nvidia" } {
+ return 1;
+ }
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,119 @@
+# This whole file adapted from libgomp.c++/c++.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+global shlib_ext
+
+set shlib_ext [get_shlib_extension]
+#TODO
+set lang_link_flags "-lstdc++"
+set lang_test_file_found 0
+set lang_library_path "../libstdc++-v3/src/.libs"
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+
+proc check_effective_target_oacc_c { } {
+ return 0
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+set blddir [lookfor_file [get_multilibs] libgomp]
+
+if { $blddir != "" } {
+ # Look for a static libstdc++ first.
+ if [file exists "${blddir}/${lang_library_path}/libstdc++.a"] {
+ set lang_test_file "${lang_library_path}/libstdc++.a"
+ set lang_test_file_found 1
+ # We may have a shared only build, so look for a shared libstdc++.
+ } elseif [file exists "${blddir}/${lang_library_path}/libstdc++.${shlib_ext}"] {
+ set lang_test_file "${lang_library_path}/libstdc++.${shlib_ext}"
+ set lang_test_file_found 1
+ } else {
+ puts "No libstdc++ library found, will not execute c++ tests"
+ }
+} elseif { [info exists GXX_UNDER_TEST] } {
+ set lang_test_file_found 1
+ # Needs to exist for libgomp.exp.
+ set lang_test_file ""
+} else {
+ puts "GXX_UNDER_TEST not defined, will not execute c++ tests"
+}
+
+if { $lang_test_file_found } {
+ if ![info exists GXX_UNDER_TEST] then {
+ # Use GCC_UNDER_TEST, but switch into C++ mode, as otherwise the
+ # c-c++-common *.c files would be compiled in C mode.
+ set GXX_UNDER_TEST "$GCC_UNDER_TEST -x c++"
+ }
+ lappend libgomp_compile_options "compiler=$GXX_UNDER_TEST"
+
+ if { $blddir != "" } {
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+ } else {
+ set ld_library_path "$always_ld_library_path"
+ }
+ append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+ set_ld_library_path_env_vars
+
+ set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags"
+ if { [file exists $flags_file] } {
+ set libstdcxx_includes [exec sh $flags_file --build-includes]
+ } else {
+ set libstdcxx_includes ""
+ }
+
+ # Todo: get list of accelerators from configure options --enable-accelerator.
+ set accels { "nvidia" "host_nonshm" }
+
+ # Run on host (or fallback) accelerator.
+ lappend accels "host"
+
+ # Test OpenACC with available accelerators.
+ set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS"
+ foreach accel $accels {
+ set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS"
+ set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+ # Set $ACC_DEVICE_TYPE. See the comments in
+ # ../lib/libgomp.exp:libgomp_init.
+ lappend ALWAYS_CFLAGS "ldflags=constructor-setenv-ACC_DEVICE_TYPE-$accel.o"
+
+ # Todo: Determine shared memory or not using run-time test.
+ switch $accel {
+ host {
+ set acc_mem_shared 1
+ }
+ host_nonshm {
+ set acc_mem_shared 0
+ }
+ nvidia {
+ # Copy ptx file (TEMPORARY)
+ remote_download host $srcdir/libgomp.oacc-c-c++-common/subr.ptx
+
+ # Where timer.h lives
+ lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/libgomp.oacc-c-c++-common"
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ set tests [lsort [find $srcdir/$subdir *.C]]
+ dg-runtest $tests "$tagopt" $libstdcxx_includes
+
+ set tests [lsort [find $srcdir/$subdir/../libgomp.oacc-c-c++-common *.c]]
+ dg-runtest $tests "$tagopt" $libstdcxx_includes
+ }
+}
+
+# All done.
+dg-finish
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (devtype) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ acc_init (devtype);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device already active" } */
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ void *d;
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ d = acc_malloc (0);
+ if (d != NULL)
+ abort ();
+
+ acc_free (0);
+
+ acc_shutdown (devtype);
+
+ acc_set_device_type (devtype);
+
+ d = acc_malloc (0);
+ if (d != NULL)
+ abort ();
+
+ acc_shutdown (devtype);
+
+ acc_init (devtype);
+
+ d = acc_malloc (1024);
+ if (d == NULL)
+ abort ();
+
+ acc_free (d);
+
+ acc_shutdown (devtype);
+
+ acc_set_device_type (devtype);
+
+ d = acc_malloc (1024);
+ if (d == NULL)
+ abort ();
+
+ acc_free (d);
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 512;
+ void *d;
+
+ d = acc_malloc (N);
+ if (d == NULL)
+ abort ();
+
+ acc_free ((void *)((uintptr_t) d + (uintptr_t) (N >> 1)));
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: mem free failed 1" } */
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ memset (h, 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h, N + 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + 1, N) != 0)
+ abort ();
+
+ if (acc_is_present (h - 1, N) != 0)
+ abort ();
+
+ if (acc_is_present (h - 1, N - 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, 0) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, N) != 0)
+ abort ();
+
+ if (acc_is_present (0, N) != 0)
+ abort ();
+
+ if (acc_is_present (h, 0) != 0)
+ abort ();
+
+ acc_free (d);
+
+ if (acc_is_present (h, 1) != 0)
+ abort ();
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h + N - 1, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h - 1, 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, 1) != 0)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, N - i) != 1)
+ abort ();
+ }
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, N - i) != 0)
+ abort ();
+ }
+
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ (void) acc_copyin (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N);
+
+ acc_copyout (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ acc_free (d);
+
+ acc_copyout (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ h[i] = (unsigned char *) malloc (N);
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = i;
+ }
+
+ (void) acc_copyin (p, N);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ memset (h[i], 0, i);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ acc_copyout (h[i], N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ free (h[i]);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ acc_shutdown (devtype);
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: no device initialized" } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N + 1);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, 0);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h + 1, N - 1);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */
new file mode 100644
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h1, *h2;
+
+ h1 = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h1[i] = 0xab;
+ }
+
+ (void) acc_copyin (h1, N);
+
+ h2 = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h2[i] = 0xde;
+ }
+
+ (void) acc_copyin (h2, N);
+
+ acc_copyout (h1, N + N);
+
+ free (h1);
+ free (h2);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */
new file mode 100644
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, 0);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (0, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (0, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, 0);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_init (acc_device_host);
+
+ acc_shutdown (acc_device_not_host);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N - 2);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (h, N);
+ if (!d)
+ abort ();
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ d2 = acc_pcreate (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h, N - 2);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h + 2, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (0, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (h, 0);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d1 = acc_present_or_copyin (h, N);
+ if (!d1)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ d2 = acc_present_or_copyin (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ d2 = acc_pcopyin (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (0, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_init ((acc_device_t) 99);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 99 is out of range" } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (h, 0);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ acc_update_device (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, 0);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, N - 2);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N - 2; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ for (i = N - 2; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (0, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, 0);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, N - 2);
+
+ for (i = 0; i < N - 2; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ for (i = N - 2; i < N; i++)
+ {
+ if (h[i] != 0)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ acc_init (acc_device_default);
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ acc_shutdown (acc_device_default);
+
+ if (acc_get_num_devices (acc_device_nvidia) != 0)
+ {
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ acc_init (acc_device_default);
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+ }
+
+ return 0;
+
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ if (acc_is_present (h, N) != 1)
+ abort ();
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+ void *d[N];
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = (unsigned char *) malloc (N);
+ d[i] = acc_malloc (N);
+
+ acc_map_data (h[i], d[i], N);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h[i], N) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ acc_unmap_data (h[i]);
+
+ if (acc_is_present (h[i], N) != 0)
+ abort ();
+
+ acc_free (d[i]);
+ free (h[i]);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (0, d, N);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, 0, N);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, 0);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ int i;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ acc_map_data ((void *)((uintptr_t) h + (uintptr_t) i),
+ (void *)((uintptr_t) d + (uintptr_t) i), 1);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + 1, 1) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ acc_unmap_data (h + i);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + 1, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N >> 1);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h + (N >> 1), 1) != 0)
+ abort ();
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ acc_unmap_data (d);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ acc_unmap_data (0);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */
new file mode 100644
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) !=
+ (void *)((uintptr_t) h + (uintptr_t) i))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_deviceptr ((void *)((uintptr_t) h + (uintptr_t) i)) !=
+ (void *)((uintptr_t) d + (uintptr_t) i))
+ abort ();
+ }
+
+ acc_unmap_data (h);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) != 0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_deviceptr (h + i) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ int devnum;
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ devnum = acc_get_num_devices (acc_device_host);
+ if (devnum != 1)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+ void *d[N];
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ h[i] = (unsigned char *) malloc (N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = i;
+ }
+
+ d[i] = acc_malloc (N);
+
+ acc_memcpy_to_device (d[i], h[i], N);
+
+ for (j = 0; j < N; j++)
+ {
+ if (acc_is_present (h[i] + j, 1) != 0)
+ abort ();
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ memset (h[i], 0, N);
+
+ acc_memcpy_from_device (h[i], d[i], N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ for (j = 0; j < N; j++)
+ {
+ if (acc_is_present (h[i] + j, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d[i]);
+
+ free (h[i]);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ acc_init (acc_device_nvidia);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_to_device (d, h, N << 1);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid size" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (0, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, 0, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, d, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host or device address" } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ acc_init (acc_device_nvidia);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_to_device (d, h, 0);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (0, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, 0, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
new file mode 100644
@@ -0,0 +1,124 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (0) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep (1);
+
+ if (acc_async_test (0) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_none) != 0)
+ abort ();
+
+ if (acc_get_num_devices (acc_device_host) == 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,136 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ const int N = 10;
+ int i;
+ CUstream streams[N];
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (i) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_async_test (i) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,119 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (0, stream);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (1) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ if (acc_async_test (1) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
new file mode 100644
@@ -0,0 +1,121 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test_all () != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.f) + 1);
+
+ if (acc_async_test_all () != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,134 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ const int N = 10;
+ int i;
+ CUstream streams[N];
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ }
+
+ if (acc_async_test_all () != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ if (acc_async_test_all () != 1)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,139 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,141 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (0);
+ }
+
+ atime = stop_timer (0);
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,147 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (i);
+ }
+
+ atime = stop_timer (0);
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (streams);
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,135 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (0, stream);
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
new file mode 100644
@@ -0,0 +1,140 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait_all ();
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait_all ();
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,167 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ devnum = 2;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (1, stream))
+ abort ();
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_async (0, 1);
+
+ if (acc_async_test (0) != 0)
+ abort ();
+
+ if (acc_async_test (1) != 0)
+ abort ();
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (acc_async_test (0) != 1)
+ abort ();
+
+ if (acc_async_test (1) != 1)
+ abort ();
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,132 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ int N;
+ int i;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (1, stream);
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_async (1, 1);
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: identical parameters" } */
new file mode 100644
@@ -0,0 +1,211 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams, stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 500.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (N);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (N, stream))
+ abort ();
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_all_async (N);
+
+ for (i = 0; i <= N; i++)
+ {
+ if (acc_async_test (i) != 0)
+ abort ();
+ }
+
+ acc_wait (N);
+
+ for (i = 0; i <= N; i++)
+ {
+ if (acc_async_test (i) != 1)
+ abort ();
+ }
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ stream = (CUstream) acc_get_cuda_stream (N + 1);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (N + 1, stream))
+ abort ();
+
+ acc_wait_all_async (N + 1);
+
+ acc_wait (N + 1);
+
+ atime = stop_timer (0);
+
+ if (0.10 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait_all_async (N);
+
+ acc_wait (N);
+
+ atime = stop_timer (0);
+
+ if (0.10 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (streams);
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,144 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay2;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams;
+ unsigned long **a, **d_a, *tid, ticks;
+ int nbytes;
+ void *kargs[3];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay2, module, "delay2");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = sizeof (int);
+
+ ticks = (unsigned long) (200.0 * clkrate);
+
+ N = nprocs;
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ a = (unsigned long **) malloc (N * sizeof (unsigned long *));
+ d_a = (unsigned long **) malloc (N * sizeof (unsigned long *));
+ tid = (unsigned long *) malloc (N * sizeof (unsigned long));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = (unsigned long *) malloc (sizeof (unsigned long));
+ *a[i] = N;
+ d_a[i] = (unsigned long *) acc_malloc (nbytes);
+ tid[i] = i;
+
+ acc_map_data (a[i], d_a[i], nbytes);
+
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ kargs[0] = (void *) &d_a[i];
+ kargs[1] = (void *) &ticks;
+ kargs[2] = (void *) &tid[i];
+
+ r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ ticks = (unsigned long) (50.0 * clkrate);
+ }
+
+ acc_wait_all_async (0);
+
+ for (i = 0; i < N; i++)
+ {
+ acc_copyout (a[i], nbytes);
+ if (*a[i] != i)
+ abort ();
+ }
+
+ free (streams);
+
+ for (i = 0; i < N; i++)
+ {
+ free (a[i]);
+ }
+
+ free (a);
+ free (d_a);
+ free (tid);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ float atime;
+ CUstream stream;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ init_timers (1);
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ start_timer (0);
+
+ acc_wait_all_async (0);
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ fini_timers ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 100;
+ int i;
+ CUstream *streams;
+ CUstream s;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ int cnt;
+
+ cnt = 0;
+
+ s = streams[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (s == streams[j])
+ cnt++;
+ }
+
+ if (cnt != 1)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 100;
+ int i;
+ CUstream *streams;
+ CUstream s;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ s = NULL;
+
+ if (acc_set_cuda_stream (N + 1, s) != 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_init (acc_device_host);
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_shutdown (acc_device_host);
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_current_cuda_device () == 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_init (acc_device_host);
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_shutdown (acc_device_host);
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char *x;
+void *d_x;
+const int N = 256;
+
+static void *
+test (void *arg)
+{
+ int i;
+
+ if (acc_get_current_cuda_context () != NULL)
+ abort ();
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ memset (x, 0, N);
+
+ acc_copyout (x, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (x[i] != i)
+ abort ();
+
+ x[i] = N - i - 1;
+ }
+
+ d_x = acc_copyin (x, N);
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ const int nthreads = 1;
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ x[i] = i;
+ }
+
+ d_x = acc_copyin (x, N);
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t));
+
+ for (i = 0; i < nthreads; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < nthreads; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ memset (x, 0, N);
+
+ acc_copyout (x, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (x[i] != N - i - 1)
+ abort ();
+ }
+
+ if (acc_is_present (x, N) != 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,118 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ p = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ p[i] = tid;
+ }
+
+ x[tid] = p;
+
+ d_x[tid] = acc_copyin (p, N);
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char **) malloc (NTHREADS * N);
+ d_x = (void **) malloc (NTHREADS * N);
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ memset (x[i], 0, N);
+ acc_copyout (x[i], N);
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ unsigned char *p;
+ int j;
+
+ p = x[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ int num_devices;
+ int devnum;
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+#endif
+
+ num_devices = acc_get_num_devices (devtype);
+ if (num_devices == 0)
+ return 0;
+
+ acc_init (devtype);
+
+ for (i = 0; i < num_devices; i++)
+ {
+ acc_set_device_num (i, devtype);
+ devnum = acc_get_device_num (devtype);
+ if (devnum != i)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ num_devices = acc_get_num_devices (devtype);
+ if (num_devices == 0)
+ abort ();
+
+ for (i = 0; i < num_devices; i++)
+ {
+ acc_set_device_num (i, devtype);
+ devnum = acc_get_device_num (devtype);
+ if (devnum != i)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ acc_init (devtype);
+
+ acc_set_device_num (0, devtype);
+
+ devnum = acc_get_device_num (devtype);
+ if (devnum != 0)
+ abort ();
+
+ if (num_devices > 1)
+ {
+ acc_set_device_num (1, (acc_device_t) 0);
+
+ devnum = acc_get_device_num (devtype);
+ if (devnum != 0)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,137 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+#include <cuda.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ p = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ p[i] = tid;
+ }
+
+ x[tid] = p;
+
+ d_x[tid] = acc_copyin (p, N);
+
+ acc_wait_all ();
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+ CUresult r;
+ CUstream s;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char **) malloc (NTHREADS * N);
+ d_x = (void **) malloc (NTHREADS * N);
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, s))
+ abort ();
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 1)
+ abort ();
+ }
+
+ acc_get_cuda_stream (1);
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ memset (x[i], 0, N);
+ acc_copyout (x[i], N);
+ }
+
+ acc_wait_all ();
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ unsigned char *p;
+ int j;
+
+ p = x[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,112 @@
+/* { dg-do run } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 32;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ acc_copyout (x[tid], N);
+
+ p = x[tid];
+
+ for (i = 0; i < N; i++)
+ {
+ if (p[i] != i)
+ abort ();
+ }
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+ unsigned char *p;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char **) malloc (NTHREADS * N);
+ d_x = (void **) malloc (NTHREADS * N);
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+
+ p = (unsigned char *) malloc (N);
+
+ x[i] = p;
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = j;
+ }
+
+ d_x[i] = acc_copyin (p, N);
+ }
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ acc_get_cuda_stream (1);
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,46 @@
+
+#if ACC_DEVICE_TYPE_nvidia
+
+#pragma acc routine nohost
+static int clock (void)
+{
+ int thetime;
+
+ asm __volatile__ ("mov.u32 %0, %%clock;" : "=r"(thetime));
+
+ return thetime;
+}
+
+#endif
+
+void
+delay (unsigned long *d_o, unsigned long delay)
+{
+ int start, ticks;
+
+ start = clock ();
+
+ ticks = 0;
+
+ while (ticks < delay)
+ ticks = clock () - start;
+
+ return;
+}
+
+void
+delay2 (unsigned long *d_o, unsigned long delay, unsigned long tid)
+{
+ int start, ticks;
+
+ start = clock ();
+
+ ticks = 0;
+
+ while (ticks < delay)
+ ticks = clock () - start;
+
+ d_o[0] = tid;
+
+ return;
+}
new file mode 100644
@@ -0,0 +1,148 @@
+// BEGIN PREAMBLE
+ .version 3.1
+ .target sm_30
+ .address_size 64
+// END PREAMBLE
+
+// BEGIN FUNCTION DEF: clock
+.func (.param.u32 %out_retval)clock
+{
+.reg.u32 %retval;
+ .reg.u64 %hr10;
+ .reg.u32 %r22;
+ .reg.u32 %r23;
+ .reg.u32 %r24;
+ .local.align 8 .b8 %frame[8];
+ // #APP
+// 7 "subr.c" 1
+ mov.u32 %r24, %clock;
+// 0 "" 2
+ // #NO_APP
+ st.local.u32 [%frame], %r24;
+ ld.local.u32 %r22, [%frame];
+ mov.u32 %r23, %r22;
+ mov.u32 %retval, %r23;
+ st.param.u32 [%out_retval], %retval;
+ ret;
+ }
+// END FUNCTION DEF
+// BEGIN GLOBAL FUNCTION DEF: delay
+.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2)
+{
+ .reg.u64 %ar1;
+ .reg.u64 %ar2;
+ .reg.u64 %hr10;
+ .reg.u64 %r22;
+ .reg.u32 %r23;
+ .reg.u64 %r24;
+ .reg.u64 %r25;
+ .reg.u32 %r26;
+ .reg.u32 %r27;
+ .reg.u32 %r28;
+ .reg.u32 %r29;
+ .reg.u32 %r30;
+ .reg.u64 %r31;
+ .reg.pred %r32;
+ .local.align 8 .b8 %frame[24];
+ ld.param.u64 %ar1, [%in_ar1];
+ ld.param.u64 %ar2, [%in_ar2];
+ mov.u64 %r24, %ar1;
+ st.u64 [%frame+8], %r24;
+ mov.u64 %r25, %ar2;
+ st.local.u64 [%frame+16], %r25;
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r26, [%retval_in];
+}
+ st.local.u32 [%frame+4], %r26;
+ mov.u32 %r27, 0;
+ st.local.u32 [%frame], %r27;
+ bra $L4;
+$L5:
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r28, [%retval_in];
+}
+ mov.u32 %r23, %r28;
+ ld.local.u32 %r30, [%frame+4];
+ sub.u32 %r29, %r23, %r30;
+ st.local.u32 [%frame], %r29;
+$L4:
+ ld.local.s32 %r22, [%frame];
+ ld.local.u64 %r31, [%frame+16];
+ setp.lo.u64 %r32,%r22,%r31;
+ @%r32 bra $L5;
+ ret;
+ }
+// END FUNCTION DEF
+// BEGIN GLOBAL FUNCTION DEF: delay2
+.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3)
+{
+ .reg.u64 %ar1;
+ .reg.u64 %ar2;
+ .reg.u64 %ar3;
+ .reg.u64 %hr10;
+ .reg.u64 %r22;
+ .reg.u32 %r23;
+ .reg.u64 %r24;
+ .reg.u64 %r25;
+ .reg.u64 %r26;
+ .reg.u32 %r27;
+ .reg.u32 %r28;
+ .reg.u32 %r29;
+ .reg.u32 %r30;
+ .reg.u32 %r31;
+ .reg.u64 %r32;
+ .reg.pred %r33;
+ .reg.u64 %r34;
+ .reg.u64 %r35;
+ .local.align 8 .b8 %frame[32];
+ ld.param.u64 %ar1, [%in_ar1];
+ ld.param.u64 %ar2, [%in_ar2];
+ ld.param.u64 %ar3, [%in_ar3];
+ mov.u64 %r24, %ar1;
+ st.local.u64 [%frame+8], %r24;
+ mov.u64 %r25, %ar2;
+ st.local.u64 [%frame+16], %r25;
+ mov.u64 %r26, %ar3;
+ st.local.u64 [%frame+24], %r26;
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r27, [%retval_in];
+}
+ st.local.u32 [%frame+4], %r27;
+ mov.u32 %r28, 0;
+ st.local.u32 [%frame], %r28;
+ bra $L8;
+$L9:
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r29, [%retval_in];
+}
+ mov.u32 %r23, %r29;
+ ld.local.u32 %r31, [%frame+4];
+ sub.u32 %r30, %r23, %r31;
+ st.local.u32 [%frame], %r30;
+$L8:
+ ld.local.s32 %r22, [%frame];
+ ld.local.u64 %r32, [%frame+16];
+ setp.lo.u64 %r33,%r22,%r32;
+ @%r33 bra $L9;
+ ld.local.u64 %r34, [%frame+8];
+ ld.local.u64 %r35, [%frame+24];
+ st.u64 [%r34], %r35;
+ ret;
+ }
+// END FUNCTION DEF
new file mode 100644
@@ -0,0 +1,103 @@
+
+#include <stdio.h>
+#include <cuda.h>
+
+static int _Tnum_timers;
+static CUevent *_Tstart_events, *_Tstop_events;
+static CUstream _Tstream;
+
+void
+init_timers (int ntimers)
+{
+ int i;
+ CUresult r;
+
+ _Tnum_timers = ntimers;
+
+ _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
+ _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
+
+ r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ for (i = 0; i < _Tnum_timers; i++)
+ {
+ r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventCreate failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventCreate failed: %d\n", r);
+ abort ();
+ }
+ }
+}
+
+void
+fini_timers (void)
+{
+ int i;
+
+ for (i = 0; i < _Tnum_timers; i++)
+ {
+ cuEventDestroy (_Tstart_events[i]);
+ cuEventDestroy (_Tstop_events[i]);
+ }
+
+ cuStreamDestroy (_Tstream);
+
+ free (_Tstart_events);
+ free (_Tstop_events);
+}
+
+void
+start_timer (int timer)
+{
+ CUresult r;
+
+ r = cuEventRecord (_Tstart_events[timer], _Tstream);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventRecord failed: %d\n", r);
+ abort ();
+ }
+}
+
+float
+stop_timer (int timer)
+{
+ CUresult r;
+ float etime;
+
+ r = cuEventRecord (_Tstop_events[timer], _Tstream);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventRecord failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventSynchronize (_Tstop_events[timer]);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventSynchronize failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventElapsedTime failed: %d\n", r);
+ abort ();
+ }
+
+ return etime;
+}
new file mode 100644
@@ -0,0 +1,86 @@
+# This whole file adapted from libgomp.c/c.exp.
+
+if [info exists lang_library_path] then {
+ unset lang_library_path
+ unset lang_link_flags
+}
+if [info exists lang_test_file] then {
+ unset lang_test_file
+}
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+if ![info exists DEFAULT_CFLAGS] then {
+ set DEFAULT_CFLAGS "-O2"
+}
+
+proc check_effective_target_oacc_c { } {
+ return 1
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+lappend libgomp_compile_options "compiler=$GCC_UNDER_TEST"
+
+set ld_library_path $always_ld_library_path
+append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+set_ld_library_path_env_vars
+
+# Todo: get list of accelerators from configure options --enable-accelerator.
+set accels { "nvidia" "host_nonshm" }
+
+# Run on host (or fallback) accelerator.
+lappend accels "host"
+
+# Test OpenACC with available accelerators.
+set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS"
+foreach accel $accels {
+ set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS"
+ set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+ # Set $ACC_DEVICE_TYPE. See the comments in
+ # ../lib/libgomp.exp:libgomp_init.
+ lappend ALWAYS_CFLAGS "ldflags=constructor-setenv-ACC_DEVICE_TYPE-$accel.o"
+
+ # Todo: Determine shared memory or not using run-time test.
+ switch $accel {
+ host {
+ set acc_mem_shared 1
+ }
+ host_nonshm {
+ set acc_mem_shared 0
+ }
+ nvidia {
+ # Copy ptx file (TEMPORARY)
+ remote_download host $srcdir/libgomp.oacc-c-c++-common/subr.ptx
+
+ # Where timer.h lives
+ lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/libgomp.oacc-c-c++-common"
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ # C tests.
+ dg-runtest [lsort [find $srcdir/$subdir *.c]] \
+ "$tagopt" $DEFAULT_CFLAGS
+
+ # C/C++ common tests.
+ dg-runtest [lsort [find $srcdir/$subdir/../libgomp.oacc-c-c++-common *.c]] \
+ "$tagopt" $DEFAULT_CFLAGS
+}
+
+# All done.
+dg-finish
new file mode 100644
@@ -0,0 +1,213 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent (&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf (stderr, "new context established\n");
+ exit (EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main (int argc, char **argv)
+{
+ cublasStatus_t s;
+ cudaError_t e;
+ cublasHandle_t h;
+ CUcontext pctx, ctx;
+ CUresult r;
+ int dev;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 1 - cuBLAS creates, OpenACC shares. */
+
+ s = cublasCreate (&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasCreate failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ e = cudaGetDevice (&dev);
+ if (e != cudaSuccess)
+ {
+ fprintf (stderr, "cudaGetDevice failed: %d\n", e);
+ exit (EXIT_FAILURE);
+ }
+
+ acc_set_device_num (dev, acc_device_nvidia);
+
+ h_X = (float *) malloc (N * sizeof (float));
+ if (!h_X)
+ {
+ fprintf (stderr, "malloc failed: for h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *) malloc (N * sizeof (float));
+ if (!h_Y1)
+ {
+ fprintf (stderr, "malloc failed: for h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *) malloc (N * sizeof (float));
+ if (!h_Y2)
+ {
+ fprintf (stderr, "malloc failed: for h_Y2\n");
+ exit (EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand () / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+ }
+
+ d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf (stderr, "copyin error h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf (stderr, "copyin error h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check (pctx);
+
+ saxpy (N, alpha, h_X, h_Y2);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float) sqrt ((double) error_norm);
+ ref_norm = (float) sqrt ((double) ref_norm);
+
+ if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf (stderr, "math error\n");
+ exit (EXIT_FAILURE);
+ }
+
+ free (h_X);
+ free (h_Y1);
+ free (h_Y2);
+
+ acc_free (d_X);
+ acc_free (d_Y);
+
+ context_check (pctx);
+
+ s = cublasDestroy (h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasDestroy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&ctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (!ctx)
+ {
+ fprintf (stderr, "Expected context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ if (pctx != ctx)
+ {
+ fprintf (stderr, "Unexpected new context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
new file mode 100644
@@ -0,0 +1,200 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent (&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf (stderr, "new context established\n");
+ exit (EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main (int argc, char **argv)
+{
+ cublasStatus_t s;
+ cublasHandle_t h;
+ CUcontext pctx;
+ CUresult r;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 3 - OpenACC creates, cuBLAS shares. */
+
+ acc_set_device_num (0, acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ h_X = (float *) malloc (N * sizeof (float));
+ if (h_X == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *) malloc (N * sizeof (float));
+ if (h_Y1 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *) malloc (N * sizeof (float));
+ if (h_Y2 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y2\n");
+ exit (EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand () / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+ }
+
+ d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf (stderr, "copyin error h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf (stderr, "copyin error h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasCreate (&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasCreate failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check (pctx);
+
+ saxpy (N, alpha, h_X, h_Y2);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float) sqrt ((double) error_norm);
+ ref_norm = (float) sqrt ((double) ref_norm);
+
+ if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf (stderr, "math error\n");
+ exit (EXIT_FAILURE);
+ }
+
+ free (h_X);
+ free (h_Y1);
+ free (h_Y2);
+
+ acc_free (d_X);
+ acc_free (d_Y);
+
+ context_check (pctx);
+
+ s = cublasDestroy (h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasDestroy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (pctx)
+ {
+ fprintf (stderr, "Unexpected context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
new file mode 100644
@@ -0,0 +1,115 @@
+# This whole file adapted from libgomp.fortran/fortran.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+load_gcc_lib gfortran-dg.exp
+
+global shlib_ext
+global ALWAYS_CFLAGS
+
+set shlib_ext [get_shlib_extension]
+set lang_library_path "../libgfortran/.libs"
+#TODO
+set lang_link_flags "-lgfortran"
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+set lang_test_file_found 0
+set quadmath_library_path "../libquadmath/.libs"
+
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+if { $blddir != "" } {
+ set lang_source_re {^.*\.[fF](|90|95|03|08)$}
+ set lang_include_flags "-fintrinsic-modules-path=${blddir}"
+ # Look for a static libgfortran first.
+ if [file exists "${blddir}/${lang_library_path}/libgfortran.a"] {
+ set lang_test_file "${lang_library_path}/libgfortran.a"
+ set lang_test_file_found 1
+ # We may have a shared only build, so look for a shared libgfortran.
+ } elseif [file exists "${blddir}/${lang_library_path}/libgfortran.${shlib_ext}"] {
+ set lang_test_file "${lang_library_path}/libgfortran.${shlib_ext}"
+ set lang_test_file_found 1
+ } else {
+ puts "No libgfortran library found, will not execute fortran tests"
+ }
+} elseif [info exists GFORTRAN_UNDER_TEST] {
+ set lang_test_file_found 1
+ # Needs to exist for libgomp.exp.
+ set lang_test_file ""
+} else {
+ puts "GFORTRAN_UNDER_TEST not defined, will not execute fortran tests"
+}
+
+if { $lang_test_file_found } {
+ if ![info exists GFORTRAN_UNDER_TEST] then {
+ set GFORTRAN_UNDER_TEST $GCC_UNDER_TEST
+ }
+ lappend libgomp_compile_options "compiler=$GFORTRAN_UNDER_TEST"
+
+ # Gather a list of all tests.
+ set tests [lsort [find $srcdir/$subdir *.\[fF\]{,90,95,03,08}]]
+
+ if { $blddir != "" } {
+ if { [file exists "${blddir}/${quadmath_library_path}/libquadmath.a"]
+ || [file exists "${blddir}/${quadmath_library_path}/libquadmath.${shlib_ext}"] } {
+ lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/${quadmath_library_path}/"
+ # Allow for spec subsitution.
+ lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/${quadmath_library_path}/"
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}:${blddir}/${quadmath_library_path}"
+ } else {
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+ }
+ } else {
+ set ld_library_path "$always_ld_library_path"
+ }
+ append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+ set_ld_library_path_env_vars
+
+ # Todo: get list of accelerators from configure options --enable-accelerator.
+ set accels { "nvidia" "host_nonshm" }
+
+ # Run on host (or fallback) accelerator.
+ lappend accels "host"
+
+ # Test OpenACC with available accelerators.
+ set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS"
+ foreach accel $accels {
+ set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS"
+ set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+ # Set $ACC_DEVICE_TYPE. See the comments in
+ # ../lib/libgomp.exp:libgomp_init.
+ lappend ALWAYS_CFLAGS "ldflags=constructor-setenv-ACC_DEVICE_TYPE-$accel.o"
+
+ # Todo: Determine shared memory or not using run-time test.
+ switch $accel {
+ host {
+ set acc_mem_shared 1
+ }
+ host_nonshm {
+ set acc_mem_shared 0
+ }
+ nvidia {
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ # For Fortran we're doing torture testing, as Fortran has far more tests
+ # with arrays etc. that testing just -O0 or -O2 is insufficient, that is
+ # typically not the case for C/C++.
+ gfortran-dg-runtest $tests "$tagopt" ""
+ }
+}
+
+# All done.
+dg-finish
new file mode 100644
@@ -0,0 +1,13 @@
+use openacc
+
+if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+call acc_set_device_type (acc_device_host)
+if (acc_get_device_type () .ne. acc_device_host) call abort
+call acc_set_device_num (0, acc_device_host)
+if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+call acc_shutdown (acc_device_host)
+
+call acc_init (acc_device_host)
+call acc_shutdown (acc_device_host)
+
+end
new file mode 100644
@@ -0,0 +1,82 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,82 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,13 @@
+ USE OPENACC
+
+ IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+ CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+ CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ CALL ACC_INIT (ACC_DEVICE_HOST)
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ END
new file mode 100644
@@ -0,0 +1,13 @@
+ INCLUDE "openacc_lib.h"
+
+ IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+ CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+ CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ CALL ACC_INIT (ACC_DEVICE_HOST)
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ END
new file mode 100644
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+ if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+ call acc_init (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ call acc_set_device_type (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_host)
+
+ if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+ if (.NOT. acc_async_test (n) ) call abort
+
+ call acc_wait (n)
+
+ call acc_wait_all ()
+
+ call acc_shutdown (acc_device_host)
+
+end program
new file mode 100644
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+ if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+ n = 1
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+ end if
+
+ call acc_shutdown (acc_device_nvidia)
+
+end program
new file mode 100644
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+ if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+ call acc_init (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ call acc_set_device_type (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_host)
+
+ if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+ if (.NOT. acc_async_test (n) ) call abort
+
+ call acc_wait (n)
+
+ call acc_wait_all ()
+
+ call acc_shutdown (acc_device_host)
+
+end program
new file mode 100644
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+ if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+ n = 1
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+ end if
+
+ call acc_shutdown (acc_device_nvidia)
+
+end program
new file mode 100644
@@ -0,0 +1,83 @@
+! { dg-do run }
+
+program main
+ use openacc
+ use iso_c_binding
+ implicit none
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,83 @@
+! { dg-do run }
+
+program main
+ use openacc
+ use iso_c_binding
+ implicit none
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+ program main
+ implicit none
+ include "openacc_lib.h"
+
+ if (openacc_version .ne. 201306) call abort;
+
+ end program main
new file mode 100644
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ if (openacc_version .ne. 201306) call abort;
+
+end program main
--
1.7.10.4