Message ID | 87h90ba8u8.fsf@euler.schwinge.homeip.net |
---|---|
State | New |
Headers | show |
Hi! Ping. On Tue, 23 May 2017 17:31:11 +0200, I wrote: > On Thu, 29 Oct 2015 17:22:46 -0700, Nathan Sidwell <nathan@acm.org> wrote: > > acc_on_device and it's builtin had a conflict. The function formally takes an > > enum argument, but the builtin takes an int -- primarily to avoid the compiler > > having to generate the enum type internally. > > > > This works fine for C, where the external declaration of the function (in > > openacc.h) matches up with the builtin, and we optimize the builtin as expected. > > > > It fails for C++ where the builtin doesn't match the declaration in the header. > > We end up with emitting a call to acc_on_device, which is resolved by > > libgomp. Unfortunately that means we fail to optimize. [...] > > > [Nathan's trunk r229562] leaves things unchanged for C -- declare a function with an enum arg. > > But for C++ we the extern "C" declaration takes an int -- and therefore > > matches the builtin. We insert an inline wrapper that takes an enum argument. > > Because of C++'s overload resolution both the wrapper and the int-taking > > declaration can have the same source name. > > > --- libgomp/openacc.h (revision 229535) > > +++ libgomp/openacc.h (working copy) > > > -int acc_on_device (acc_device_t) __GOACC_NOTHROW; > > +#ifdef __cplusplus > > +int acc_on_device (int __arg) __GOACC_NOTHROW; > > +#else > > +int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW; > > +#endif > > > #ifdef __cplusplus > > } > > + > > +/* Forwarding function with correctly typed arg. */ > > + > > +inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW > > +{ > > + return acc_on_device ((int) __arg); > > +} > > #endif > > > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c (revision 0) > > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c (working copy) > > @@ -0,0 +1,12 @@ > > +/* { dg-do compile } */ > > +/* { dg-additional-options "-O2" } */ > > + > > +#include <openacc.h> > > + > > +int Foo (acc_device_t x) > > +{ > > + return acc_on_device (x); > > +} > > + > > +/* { dg-final { scan-assembler-not "acc_on_device" } } */ > > As a user, I'd expect that when compiling such code with "-O0" instead of > "-O2", but adding "__attribute__ ((optimize ("O2")))" to "Foo", that I'd > then get "acc_on_device" expanded as a builtin, and no calls to the > "acc_on_device library function. In C++ that is currently not working, > because the "Forwarding function with correctly typed arg" (cited above) > doesn't "inherit" that "optimize" attribute. Making that one "always > inline" resolves the problem. Also I cleaned up and extended testing > some more. OK for trunk? > > commit 9cc3a384c17e9f692f7864c604d2e2f9fbf0bac9 > Author: Thomas Schwinge <thomas@codesourcery.com> > Date: Tue May 23 13:21:14 2017 +0200 > > Make the OpenACC C++ acc_on_device wrapper "always inline" > > libgomp/ > * openacc.h [__cplusplus] (acc_on_device): Mark as "always > inline". > * testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c: Remove > file; test cases already present... > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: ... in > this file. Update. > * testsuite/libgomp.oacc-c-c++-common/acc-on-device.c: Remove > file; test cases now present... > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c: ... in > this new file. > * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Update. > --- > libgomp/openacc.h | 3 +- > .../libgomp.oacc-c-c++-common/acc-on-device-2.c | 22 ------------- > .../libgomp.oacc-c-c++-common/acc-on-device.c | 12 ------- > .../libgomp.oacc-c-c++-common/acc_on_device-1.c | 38 +++++++++++++--------- > .../libgomp.oacc-c-c++-common/acc_on_device-2.c | 21 ++++++++++++ > .../libgomp.oacc-c-c++-common/parallel-dims.c | 14 ++++---- > 6 files changed, 52 insertions(+), 58 deletions(-) > > diff --git libgomp/openacc.h libgomp/openacc.h > index 137e2c1..266f559 100644 > --- libgomp/openacc.h > +++ libgomp/openacc.h > @@ -121,7 +121,8 @@ int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; > /* Forwarding function with correctly typed arg. */ > > #pragma acc routine seq > -inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW > +inline __attribute__ ((__always_inline__)) int > +acc_on_device (acc_device_t __arg) __GOACC_NOTHROW > { > return acc_on_device ((int) __arg); > } > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c > deleted file mode 100644 > index bfcb67d..0000000 > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c > +++ /dev/null > @@ -1,22 +0,0 @@ > -/* Test the acc_on_device library function. */ > -/* { dg-additional-options "-fno-builtin-acc_on_device" } */ > - > -#include <openacc.h> > - > -int main () > -{ > - int dev; > - > -#pragma acc parallel copyout (dev) > - { > - dev = acc_on_device (acc_device_not_host); > - } > - > - int expect = 1; > - > -#if ACC_DEVICE_TYPE_host > - expect = 0; > -#endif > - > - return dev != expect; > -} > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c > deleted file mode 100644 > index e0d8710..0000000 > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c > +++ /dev/null > @@ -1,12 +0,0 @@ > -/* { dg-do compile } */ > -/* We don't expect this to work with optimizations disabled. > - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ > - > -#include <openacc.h> > - > -int Foo (acc_device_t x) > -{ > - return acc_on_device (x); > -} > - > -/* { dg-final { scan-assembler-not "acc_on_device" } } */ > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c > index 8112745..eb962e4 100644 > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c > @@ -1,6 +1,9 @@ > /* Disable the acc_on_device builtin; we want to test the libgomp library > function. */ > +/* { dg-additional-options "-DACC_ON_DEVICE=acc_on_device" } */ > /* { dg-additional-options "-fno-builtin-acc_on_device" } */ > +/* { dg-additional-options "-fdump-rtl-expand" } > + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 12 "expand" } } */ > > #include <stdlib.h> > #include <openacc.h> > @@ -11,13 +14,13 @@ main (int argc, char *argv[]) > /* Host. */ > > { > - if (!acc_on_device (acc_device_none)) > + if (!ACC_ON_DEVICE (acc_device_none)) > abort (); > - if (!acc_on_device (acc_device_host)) > + if (!ACC_ON_DEVICE (acc_device_host)) > abort (); > - if (acc_on_device (acc_device_not_host)) > + if (ACC_ON_DEVICE (acc_device_not_host)) > abort (); > - if (acc_on_device (acc_device_nvidia)) > + if (ACC_ON_DEVICE (acc_device_nvidia)) > abort (); > } > > @@ -26,39 +29,44 @@ main (int argc, char *argv[]) > > #pragma acc parallel if(0) > { > - if (!acc_on_device (acc_device_none)) > + if (!ACC_ON_DEVICE (acc_device_none)) > abort (); > - if (!acc_on_device (acc_device_host)) > + if (!ACC_ON_DEVICE (acc_device_host)) > abort (); > - if (acc_on_device (acc_device_not_host)) > + if (ACC_ON_DEVICE (acc_device_not_host)) > abort (); > - if (acc_on_device (acc_device_nvidia)) > + if (ACC_ON_DEVICE (acc_device_nvidia)) > abort (); > } > > > -#if !ACC_DEVICE_TYPE_host > + int on_host_p; > +#if ACC_DEVICE_TYPE_host > + on_host_p = 1; > +#else > + on_host_p = 0; > +#endif > > /* Offloaded. */ > > #pragma acc parallel > { > - if (acc_on_device (acc_device_none)) > + if (on_host_p != ACC_ON_DEVICE (acc_device_none)) > abort (); > - if (acc_on_device (acc_device_host)) > + if (on_host_p != ACC_ON_DEVICE (acc_device_host)) > abort (); > - if (!acc_on_device (acc_device_not_host)) > + if (on_host_p == ACC_ON_DEVICE (acc_device_not_host)) > abort (); > + > #if ACC_DEVICE_TYPE_nvidia > - if (!acc_on_device (acc_device_nvidia)) > + if (!ACC_ON_DEVICE (acc_device_nvidia)) > abort (); > #else > - if (acc_on_device (acc_device_nvidia)) > + if (ACC_ON_DEVICE (acc_device_nvidia)) > abort (); > #endif > } > > -#endif > > return 0; > } > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c > new file mode 100644 > index 0000000..c3b3378 > --- /dev/null > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c > @@ -0,0 +1,21 @@ > +/* With the acc_on_device builtin enabled, we don't expect any calls to the > + libgomp library function. */ > +/* { dg-additional-options "-fdump-rtl-expand" } > + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 "expand" } } */ > + > +#include <openacc.h> > + > +#ifdef __OPTIMIZE__ > +# define ACC_ON_DEVICE acc_on_device > +#else > +/* Without optimizations enabled, we're not expecting the acc_on_device builtin > + to be used, so use here a "-O2" wrapper. */ > +#pragma acc routine seq > +static int __attribute__ ((optimize ("O2"))) > +ACC_ON_DEVICE (acc_device_t arg) > +{ > + return acc_on_device (arg); > +} > +#endif > + > +#include "acc_on_device-1.c" > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c > index 8308f7c..1c48ab3 100644 > --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c > @@ -4,14 +4,12 @@ > #include <limits.h> > #include <openacc.h> > > -/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper > - not behaving as expected for -O0. */ > #pragma acc routine seq > static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () > { > - if (acc_on_device ((int) acc_device_host)) > + if (acc_on_device (acc_device_host)) > return 0; > - else if (acc_on_device ((int) acc_device_nvidia)) > + else if (acc_on_device (acc_device_nvidia)) > { > unsigned int r; > asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r)); > @@ -24,9 +22,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () > #pragma acc routine seq > static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () > { > - if (acc_on_device ((int) acc_device_host)) > + if (acc_on_device (acc_device_host)) > return 0; > - else if (acc_on_device ((int) acc_device_nvidia)) > + else if (acc_on_device (acc_device_nvidia)) > { > unsigned int r; > asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r)); > @@ -39,9 +37,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () > #pragma acc routine seq > static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () > { > - if (acc_on_device ((int) acc_device_host)) > + if (acc_on_device (acc_device_host)) > return 0; > - else if (acc_on_device ((int) acc_device_nvidia)) > + else if (acc_on_device (acc_device_nvidia)) > { > unsigned int r; > asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r)); Grüße Thomas
Hi! Ping. On Tue, 30 May 2017 14:35:29 +0200, I wrote: > Ping. > > On Tue, 23 May 2017 17:31:11 +0200, I wrote: > > On Thu, 29 Oct 2015 17:22:46 -0700, Nathan Sidwell <nathan@acm.org> wrote: > > > acc_on_device and it's builtin had a conflict. The function formally takes an > > > enum argument, but the builtin takes an int -- primarily to avoid the compiler > > > having to generate the enum type internally. > > > > > > This works fine for C, where the external declaration of the function (in > > > openacc.h) matches up with the builtin, and we optimize the builtin as expected. > > > > > > It fails for C++ where the builtin doesn't match the declaration in the header. > > > We end up with emitting a call to acc_on_device, which is resolved by > > > libgomp. Unfortunately that means we fail to optimize. [...] > > > > > [Nathan's trunk r229562] leaves things unchanged for C -- declare a function with an enum arg. > > > But for C++ we the extern "C" declaration takes an int -- and therefore > > > matches the builtin. We insert an inline wrapper that takes an enum argument. > > > Because of C++'s overload resolution both the wrapper and the int-taking > > > declaration can have the same source name. > > > > > --- libgomp/openacc.h (revision 229535) > > > +++ libgomp/openacc.h (working copy) > > > > > -int acc_on_device (acc_device_t) __GOACC_NOTHROW; > > > +#ifdef __cplusplus > > > +int acc_on_device (int __arg) __GOACC_NOTHROW; > > > +#else > > > +int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW; > > > +#endif > > > > > #ifdef __cplusplus > > > } > > > + > > > +/* Forwarding function with correctly typed arg. */ > > > + > > > +inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW > > > +{ > > > + return acc_on_device ((int) __arg); > > > +} > > > #endif > > > > > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c (revision 0) > > > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c (working copy) > > > @@ -0,0 +1,12 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-additional-options "-O2" } */ > > > + > > > +#include <openacc.h> > > > + > > > +int Foo (acc_device_t x) > > > +{ > > > + return acc_on_device (x); > > > +} > > > + > > > +/* { dg-final { scan-assembler-not "acc_on_device" } } */ > > > > As a user, I'd expect that when compiling such code with "-O0" instead of > > "-O2", but adding "__attribute__ ((optimize ("O2")))" to "Foo", that I'd > > then get "acc_on_device" expanded as a builtin, and no calls to the > > "acc_on_device library function. In C++ that is currently not working, > > because the "Forwarding function with correctly typed arg" (cited above) > > doesn't "inherit" that "optimize" attribute. Making that one "always > > inline" resolves the problem. Also I cleaned up and extended testing > > some more. OK for trunk? > > > > commit 9cc3a384c17e9f692f7864c604d2e2f9fbf0bac9 > > Author: Thomas Schwinge <thomas@codesourcery.com> > > Date: Tue May 23 13:21:14 2017 +0200 > > > > Make the OpenACC C++ acc_on_device wrapper "always inline" > > > > libgomp/ > > * openacc.h [__cplusplus] (acc_on_device): Mark as "always > > inline". > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c: Remove > > file; test cases already present... > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: ... in > > this file. Update. > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device.c: Remove > > file; test cases now present... > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c: ... in > > this new file. > > * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Update. > > --- > > libgomp/openacc.h | 3 +- > > .../libgomp.oacc-c-c++-common/acc-on-device-2.c | 22 ------------- > > .../libgomp.oacc-c-c++-common/acc-on-device.c | 12 ------- > > .../libgomp.oacc-c-c++-common/acc_on_device-1.c | 38 +++++++++++++--------- > > .../libgomp.oacc-c-c++-common/acc_on_device-2.c | 21 ++++++++++++ > > .../libgomp.oacc-c-c++-common/parallel-dims.c | 14 ++++---- > > 6 files changed, 52 insertions(+), 58 deletions(-) > > > > diff --git libgomp/openacc.h libgomp/openacc.h > > index 137e2c1..266f559 100644 > > --- libgomp/openacc.h > > +++ libgomp/openacc.h > > @@ -121,7 +121,8 @@ int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; > > /* Forwarding function with correctly typed arg. */ > > > > #pragma acc routine seq > > -inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW > > +inline __attribute__ ((__always_inline__)) int > > +acc_on_device (acc_device_t __arg) __GOACC_NOTHROW > > { > > return acc_on_device ((int) __arg); > > } > > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c > > deleted file mode 100644 > > index bfcb67d..0000000 > > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c > > +++ /dev/null > > @@ -1,22 +0,0 @@ > > -/* Test the acc_on_device library function. */ > > -/* { dg-additional-options "-fno-builtin-acc_on_device" } */ > > - > > -#include <openacc.h> > > - > > -int main () > > -{ > > - int dev; > > - > > -#pragma acc parallel copyout (dev) > > - { > > - dev = acc_on_device (acc_device_not_host); > > - } > > - > > - int expect = 1; > > - > > -#if ACC_DEVICE_TYPE_host > > - expect = 0; > > -#endif > > - > > - return dev != expect; > > -} > > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c > > deleted file mode 100644 > > index e0d8710..0000000 > > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c > > +++ /dev/null > > @@ -1,12 +0,0 @@ > > -/* { dg-do compile } */ > > -/* We don't expect this to work with optimizations disabled. > > - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ > > - > > -#include <openacc.h> > > - > > -int Foo (acc_device_t x) > > -{ > > - return acc_on_device (x); > > -} > > - > > -/* { dg-final { scan-assembler-not "acc_on_device" } } */ > > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c > > index 8112745..eb962e4 100644 > > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c > > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c > > @@ -1,6 +1,9 @@ > > /* Disable the acc_on_device builtin; we want to test the libgomp library > > function. */ > > +/* { dg-additional-options "-DACC_ON_DEVICE=acc_on_device" } */ > > /* { dg-additional-options "-fno-builtin-acc_on_device" } */ > > +/* { dg-additional-options "-fdump-rtl-expand" } > > + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 12 "expand" } } */ > > > > #include <stdlib.h> > > #include <openacc.h> > > @@ -11,13 +14,13 @@ main (int argc, char *argv[]) > > /* Host. */ > > > > { > > - if (!acc_on_device (acc_device_none)) > > + if (!ACC_ON_DEVICE (acc_device_none)) > > abort (); > > - if (!acc_on_device (acc_device_host)) > > + if (!ACC_ON_DEVICE (acc_device_host)) > > abort (); > > - if (acc_on_device (acc_device_not_host)) > > + if (ACC_ON_DEVICE (acc_device_not_host)) > > abort (); > > - if (acc_on_device (acc_device_nvidia)) > > + if (ACC_ON_DEVICE (acc_device_nvidia)) > > abort (); > > } > > > > @@ -26,39 +29,44 @@ main (int argc, char *argv[]) > > > > #pragma acc parallel if(0) > > { > > - if (!acc_on_device (acc_device_none)) > > + if (!ACC_ON_DEVICE (acc_device_none)) > > abort (); > > - if (!acc_on_device (acc_device_host)) > > + if (!ACC_ON_DEVICE (acc_device_host)) > > abort (); > > - if (acc_on_device (acc_device_not_host)) > > + if (ACC_ON_DEVICE (acc_device_not_host)) > > abort (); > > - if (acc_on_device (acc_device_nvidia)) > > + if (ACC_ON_DEVICE (acc_device_nvidia)) > > abort (); > > } > > > > > > -#if !ACC_DEVICE_TYPE_host > > + int on_host_p; > > +#if ACC_DEVICE_TYPE_host > > + on_host_p = 1; > > +#else > > + on_host_p = 0; > > +#endif > > > > /* Offloaded. */ > > > > #pragma acc parallel > > { > > - if (acc_on_device (acc_device_none)) > > + if (on_host_p != ACC_ON_DEVICE (acc_device_none)) > > abort (); > > - if (acc_on_device (acc_device_host)) > > + if (on_host_p != ACC_ON_DEVICE (acc_device_host)) > > abort (); > > - if (!acc_on_device (acc_device_not_host)) > > + if (on_host_p == ACC_ON_DEVICE (acc_device_not_host)) > > abort (); > > + > > #if ACC_DEVICE_TYPE_nvidia > > - if (!acc_on_device (acc_device_nvidia)) > > + if (!ACC_ON_DEVICE (acc_device_nvidia)) > > abort (); > > #else > > - if (acc_on_device (acc_device_nvidia)) > > + if (ACC_ON_DEVICE (acc_device_nvidia)) > > abort (); > > #endif > > } > > > > -#endif > > > > return 0; > > } > > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c > > new file mode 100644 > > index 0000000..c3b3378 > > --- /dev/null > > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c > > @@ -0,0 +1,21 @@ > > +/* With the acc_on_device builtin enabled, we don't expect any calls to the > > + libgomp library function. */ > > +/* { dg-additional-options "-fdump-rtl-expand" } > > + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 "expand" } } */ > > + > > +#include <openacc.h> > > + > > +#ifdef __OPTIMIZE__ > > +# define ACC_ON_DEVICE acc_on_device > > +#else > > +/* Without optimizations enabled, we're not expecting the acc_on_device builtin > > + to be used, so use here a "-O2" wrapper. */ > > +#pragma acc routine seq > > +static int __attribute__ ((optimize ("O2"))) > > +ACC_ON_DEVICE (acc_device_t arg) > > +{ > > + return acc_on_device (arg); > > +} > > +#endif > > + > > +#include "acc_on_device-1.c" > > diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c > > index 8308f7c..1c48ab3 100644 > > --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c > > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c > > @@ -4,14 +4,12 @@ > > #include <limits.h> > > #include <openacc.h> > > > > -/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper > > - not behaving as expected for -O0. */ > > #pragma acc routine seq > > static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () > > { > > - if (acc_on_device ((int) acc_device_host)) > > + if (acc_on_device (acc_device_host)) > > return 0; > > - else if (acc_on_device ((int) acc_device_nvidia)) > > + else if (acc_on_device (acc_device_nvidia)) > > { > > unsigned int r; > > asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r)); > > @@ -24,9 +22,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () > > #pragma acc routine seq > > static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () > > { > > - if (acc_on_device ((int) acc_device_host)) > > + if (acc_on_device (acc_device_host)) > > return 0; > > - else if (acc_on_device ((int) acc_device_nvidia)) > > + else if (acc_on_device (acc_device_nvidia)) > > { > > unsigned int r; > > asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r)); > > @@ -39,9 +37,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () > > #pragma acc routine seq > > static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () > > { > > - if (acc_on_device ((int) acc_device_host)) > > + if (acc_on_device (acc_device_host)) > > return 0; > > - else if (acc_on_device ((int) acc_device_nvidia)) > > + else if (acc_on_device (acc_device_nvidia)) > > { > > unsigned int r; > > asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r)); Grüße Thomas
On Tue, Jun 06, 2017 at 08:35:40AM +0200, Thomas Schwinge wrote: > > > commit 9cc3a384c17e9f692f7864c604d2e2f9fbf0bac9 > > > Author: Thomas Schwinge <thomas@codesourcery.com> > > > Date: Tue May 23 13:21:14 2017 +0200 > > > > > > Make the OpenACC C++ acc_on_device wrapper "always inline" > > > > > > libgomp/ > > > * openacc.h [__cplusplus] (acc_on_device): Mark as "always > > > inline". > > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c: Remove > > > file; test cases already present... > > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: ... in > > > this file. Update. > > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device.c: Remove > > > file; test cases now present... > > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c: ... in > > > this new file. > > > * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Update. I don't like this very much. Can't you instead just turn the builtin into BT_FN_INT_VAR and diagnose during folding if it has no or 2+ arguments or if the argument is not type compatible with int? Jakub
Hi Jakub! On Tue, 6 Jun 2017 08:58:21 +0200, Jakub Jelinek <jakub@redhat.com> wrote: > On Tue, Jun 06, 2017 at 08:35:40AM +0200, Thomas Schwinge wrote: > > > > commit 9cc3a384c17e9f692f7864c604d2e2f9fbf0bac9 > > > > Author: Thomas Schwinge <thomas@codesourcery.com> > > > > Date: Tue May 23 13:21:14 2017 +0200 > > > > > > > > Make the OpenACC C++ acc_on_device wrapper "always inline" > > > > > > > > libgomp/ > > > > * openacc.h [__cplusplus] (acc_on_device): Mark as "always > > > > inline". > > > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c: Remove > > > > file; test cases already present... > > > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: ... in > > > > this file. Update. > > > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device.c: Remove > > > > file; test cases now present... > > > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c: ... in > > > > this new file. > > > > * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Update. > > I don't like this very much. Thanks for having a look. Would you please clarify whether "this" applies to my "always inline" changes and testing additions that you quoted, or rather to the C++ "acc_on_device" wrapper function as it is currently present? > Can't you instead just turn the builtin into BT_FN_INT_VAR and diagnose > during folding if it has no or 2+ arguments or if the argument is not type > compatible with int? Thanks for the suggestion, I'll look into that! In terms of incremental progress, do you oppose that I commit my existing patch now, and then rework the builtin in a later patch? Grüße Thomas
On Tue, Jun 06, 2017 at 01:16:03PM +0200, Thomas Schwinge wrote: > On Tue, 6 Jun 2017 08:58:21 +0200, Jakub Jelinek <jakub@redhat.com> wrote: > > On Tue, Jun 06, 2017 at 08:35:40AM +0200, Thomas Schwinge wrote: > > > > > commit 9cc3a384c17e9f692f7864c604d2e2f9fbf0bac9 > > > > > Author: Thomas Schwinge <thomas@codesourcery.com> > > > > > Date: Tue May 23 13:21:14 2017 +0200 > > > > > > > > > > Make the OpenACC C++ acc_on_device wrapper "always inline" > > > > > > > > > > libgomp/ > > > > > * openacc.h [__cplusplus] (acc_on_device): Mark as "always > > > > > inline". > > > > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c: Remove > > > > > file; test cases already present... > > > > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: ... in > > > > > this file. Update. > > > > > * testsuite/libgomp.oacc-c-c++-common/acc-on-device.c: Remove > > > > > file; test cases now present... > > > > > * testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c: ... in > > > > > this new file. > > > > > * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Update. > > > > I don't like this very much. > > Thanks for having a look. Would you please clarify whether "this" > applies to my "always inline" changes and testing additions that you > quoted, or rather to the C++ "acc_on_device" wrapper function as it is > currently present? The C++ acc_on_device wrapper altogether, though of course always inline on it doesn't sound right either (what if you want to take acc_on_device address?). > > Can't you instead just turn the builtin into BT_FN_INT_VAR and diagnose > > during folding if it has no or 2+ arguments or if the argument is not type > > compatible with int? > > Thanks for the suggestion, I'll look into that! > > In terms of incremental progress, do you oppose that I commit my existing > patch now, and then rework the builtin in a later patch? We are in stage1 and this doesn't seem to be a blocker, I think it is better to do it right, no need to do it incrementally. Jakub
diff --git libgomp/openacc.h libgomp/openacc.h index 137e2c1..266f559 100644 --- libgomp/openacc.h +++ libgomp/openacc.h @@ -121,7 +121,8 @@ int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; /* Forwarding function with correctly typed arg. */ #pragma acc routine seq -inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW +inline __attribute__ ((__always_inline__)) int +acc_on_device (acc_device_t __arg) __GOACC_NOTHROW { return acc_on_device ((int) __arg); } diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c deleted file mode 100644 index bfcb67d..0000000 --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c +++ /dev/null @@ -1,22 +0,0 @@ -/* Test the acc_on_device library function. */ -/* { dg-additional-options "-fno-builtin-acc_on_device" } */ - -#include <openacc.h> - -int main () -{ - int dev; - -#pragma acc parallel copyout (dev) - { - dev = acc_on_device (acc_device_not_host); - } - - int expect = 1; - -#if ACC_DEVICE_TYPE_host - expect = 0; -#endif - - return dev != expect; -} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c deleted file mode 100644 index e0d8710..0000000 --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c +++ /dev/null @@ -1,12 +0,0 @@ -/* { dg-do compile } */ -/* We don't expect this to work with optimizations disabled. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - -#include <openacc.h> - -int Foo (acc_device_t x) -{ - return acc_on_device (x); -} - -/* { dg-final { scan-assembler-not "acc_on_device" } } */ diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c index 8112745..eb962e4 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c @@ -1,6 +1,9 @@ /* Disable the acc_on_device builtin; we want to test the libgomp library function. */ +/* { dg-additional-options "-DACC_ON_DEVICE=acc_on_device" } */ /* { dg-additional-options "-fno-builtin-acc_on_device" } */ +/* { dg-additional-options "-fdump-rtl-expand" } + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 12 "expand" } } */ #include <stdlib.h> #include <openacc.h> @@ -11,13 +14,13 @@ main (int argc, char *argv[]) /* Host. */ { - if (!acc_on_device (acc_device_none)) + if (!ACC_ON_DEVICE (acc_device_none)) abort (); - if (!acc_on_device (acc_device_host)) + if (!ACC_ON_DEVICE (acc_device_host)) abort (); - if (acc_on_device (acc_device_not_host)) + if (ACC_ON_DEVICE (acc_device_not_host)) abort (); - if (acc_on_device (acc_device_nvidia)) + if (ACC_ON_DEVICE (acc_device_nvidia)) abort (); } @@ -26,39 +29,44 @@ main (int argc, char *argv[]) #pragma acc parallel if(0) { - if (!acc_on_device (acc_device_none)) + if (!ACC_ON_DEVICE (acc_device_none)) abort (); - if (!acc_on_device (acc_device_host)) + if (!ACC_ON_DEVICE (acc_device_host)) abort (); - if (acc_on_device (acc_device_not_host)) + if (ACC_ON_DEVICE (acc_device_not_host)) abort (); - if (acc_on_device (acc_device_nvidia)) + if (ACC_ON_DEVICE (acc_device_nvidia)) abort (); } -#if !ACC_DEVICE_TYPE_host + int on_host_p; +#if ACC_DEVICE_TYPE_host + on_host_p = 1; +#else + on_host_p = 0; +#endif /* Offloaded. */ #pragma acc parallel { - if (acc_on_device (acc_device_none)) + if (on_host_p != ACC_ON_DEVICE (acc_device_none)) abort (); - if (acc_on_device (acc_device_host)) + if (on_host_p != ACC_ON_DEVICE (acc_device_host)) abort (); - if (!acc_on_device (acc_device_not_host)) + if (on_host_p == ACC_ON_DEVICE (acc_device_not_host)) abort (); + #if ACC_DEVICE_TYPE_nvidia - if (!acc_on_device (acc_device_nvidia)) + if (!ACC_ON_DEVICE (acc_device_nvidia)) abort (); #else - if (acc_on_device (acc_device_nvidia)) + if (ACC_ON_DEVICE (acc_device_nvidia)) abort (); #endif } -#endif return 0; } diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c new file mode 100644 index 0000000..c3b3378 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c @@ -0,0 +1,21 @@ +/* With the acc_on_device builtin enabled, we don't expect any calls to the + libgomp library function. */ +/* { dg-additional-options "-fdump-rtl-expand" } + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 "expand" } } */ + +#include <openacc.h> + +#ifdef __OPTIMIZE__ +# define ACC_ON_DEVICE acc_on_device +#else +/* Without optimizations enabled, we're not expecting the acc_on_device builtin + to be used, so use here a "-O2" wrapper. */ +#pragma acc routine seq +static int __attribute__ ((optimize ("O2"))) +ACC_ON_DEVICE (acc_device_t arg) +{ + return acc_on_device (arg); +} +#endif + +#include "acc_on_device-1.c" diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index 8308f7c..1c48ab3 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -4,14 +4,12 @@ #include <limits.h> #include <openacc.h> -/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper - not behaving as expected for -O0. */ #pragma acc routine seq static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () { - if (acc_on_device ((int) acc_device_host)) + if (acc_on_device (acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device (acc_device_nvidia)) { unsigned int r; asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r)); @@ -24,9 +22,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () #pragma acc routine seq static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () { - if (acc_on_device ((int) acc_device_host)) + if (acc_on_device (acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device (acc_device_nvidia)) { unsigned int r; asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r)); @@ -39,9 +37,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () #pragma acc routine seq static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () { - if (acc_on_device ((int) acc_device_host)) + if (acc_on_device (acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device (acc_device_nvidia)) { unsigned int r; asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));