diff mbox

PR65742: OpenACC acc_on_device fixes

Message ID 87zj2yxzq5.fsf@kepler.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge July 14, 2015, 8:15 p.m. UTC
Hi!

Per your request in
<https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65742#c8>, »can you please
fix the gcc 5 branch«, I'm planning to apply the following to
gcc-5-branch tomorrow (but wanted to give you a chance to veto, given
that your backport request pre-dates the branch freeze by a week):

commit b73b9881a781f8e5572ce6c6a38f51696fc09b83
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Tue Jul 14 15:27:49 2015 +0200

    OpenACC acc_on_device fixes
    
    Backport trunk r223801:
    
        PR libgomp/65742
    
        gcc/
        * builtins.c (expand_builtin_acc_on_device): Don't use open-coded
        sequence for !ACCEL_COMPILER.
    
        libgomp/
        * oacc-init.c (plugin/plugin-host.h): Include.
        (acc_on_device): Check whether we're in an offloaded region for
        host_nonshm
        plugin. Don't use __builtin_acc_on_device.
        * plugin/plugin-host.c (GOMP_OFFLOAD_openacc_parallel): Set
        nonshm_exec flag in thread-local data.
        (GOMP_OFFLOAD_openacc_create_thread_data): Allocate thread-local
        data for host_nonshm plugin.
        (GOMP_OFFLOAD_openacc_destroy_thread_data): Free thread-local data
        for host_nonshm plugin.
        * plugin/plugin-host.h: New.
    
    Mark parameters with ATTRIBUTE_UNUSED
    
    Backport trunk r223805:
    
    	* builtins.c (expand_builtin_acc_on_device): Mark parameters
    	with ATTRIBUTE_UNUSED.
    
    [PR libgomp/65742, PR middle-end/66332] XFAIL acc_on_device compile-time evaluation
    
    The OpenACC 2.0a specification mandates differently, but we currently do get a
    library call in the host code.
    
    Backport trunk r224028:
    
    	PR libgomp/65742
    	PR middle-end/66332
    
    	gcc/testsuite/
    	* c-c++-common/goacc/acc_on_device-2.c: XFAIL for C, too.
---
 gcc/builtins.c                                     |   12 +++----
 gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c |   10 +++++-
 libgomp/oacc-init.c                                |   14 ++++++--
 libgomp/plugin/plugin-host.c                       |   21 +++++++++--
 libgomp/plugin/plugin-host.h                       |   37 ++++++++++++++++++++
 8 files changed, 127 insertions(+), 12 deletions(-)



Grüße,
 Thomas

Comments

Richard Biener July 15, 2015, 6:10 a.m. UTC | #1
On July 14, 2015 10:15:30 PM GMT+02:00, Thomas Schwinge <thomas@codesourcery.com> wrote:
>Hi!
>
>Per your request in
><https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65742#c8>, »can you
>please
>fix the gcc 5 branch«, I'm planning to apply the following to
>gcc-5-branch tomorrow (but wanted to give you a chance to veto, given
>that your backport request pre-dates the branch freeze by a week):

OK

Richard

>commit b73b9881a781f8e5572ce6c6a38f51696fc09b83
>Author: Thomas Schwinge <thomas@codesourcery.com>
>Date:   Tue Jul 14 15:27:49 2015 +0200
>
>    OpenACC acc_on_device fixes
>    
>    Backport trunk r223801:
>    
>        PR libgomp/65742
>    
>        gcc/
>      * builtins.c (expand_builtin_acc_on_device): Don't use open-coded
>        sequence for !ACCEL_COMPILER.
>    
>        libgomp/
>        * oacc-init.c (plugin/plugin-host.h): Include.
>        (acc_on_device): Check whether we're in an offloaded region for
>        host_nonshm
>        plugin. Don't use __builtin_acc_on_device.
>        * plugin/plugin-host.c (GOMP_OFFLOAD_openacc_parallel): Set
>        nonshm_exec flag in thread-local data.
>       (GOMP_OFFLOAD_openacc_create_thread_data): Allocate thread-local
>        data for host_nonshm plugin.
>     (GOMP_OFFLOAD_openacc_destroy_thread_data): Free thread-local data
>        for host_nonshm plugin.
>        * plugin/plugin-host.h: New.
>    
>    Mark parameters with ATTRIBUTE_UNUSED
>    
>    Backport trunk r223805:
>    
>    	* builtins.c (expand_builtin_acc_on_device): Mark parameters
>    	with ATTRIBUTE_UNUSED.
>    
>[PR libgomp/65742, PR middle-end/66332] XFAIL acc_on_device
>compile-time evaluation
>    
>The OpenACC 2.0a specification mandates differently, but we currently
>do get a
>    library call in the host code.
>    
>    Backport trunk r224028:
>    
>    	PR libgomp/65742
>    	PR middle-end/66332
>    
>    	gcc/testsuite/
>    	* c-c++-common/goacc/acc_on_device-2.c: XFAIL for C, too.
>---
> gcc/builtins.c                                     |   12 +++----
> gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c |   10 +++++-
> libgomp/oacc-init.c                                |   14 ++++++--
> libgomp/plugin/plugin-host.c                       |   21 +++++++++--
>libgomp/plugin/plugin-host.h                       |   37
>++++++++++++++++++++
> 8 files changed, 127 insertions(+), 12 deletions(-)
>
>diff --git gcc/builtins.c gcc/builtins.c
>index 9263777..bcbc11d 100644
>--- gcc/builtins.c
>+++ gcc/builtins.c
>@@ -5915,8 +5915,10 @@ expand_stack_save (void)
>    acceleration device (ACCEL_COMPILER conditional).  */
> 
> static rtx
>-expand_builtin_acc_on_device (tree exp, rtx target)
>+expand_builtin_acc_on_device (tree exp ATTRIBUTE_UNUSED,
>+			      rtx target ATTRIBUTE_UNUSED)
> {
>+#ifdef ACCEL_COMPILER
>   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
>     return NULL_RTX;
> 
>@@ -5925,13 +5927,8 @@ expand_builtin_acc_on_device (tree exp, rtx
>target)
>   /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
>   machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
>   rtx v = expand_normal (arg), v1, v2;
>-#ifdef ACCEL_COMPILER
>   v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
>   v2 = GEN_INT (ACCEL_COMPILER_acc_device);
>-#else
>-  v1 = GEN_INT (GOMP_DEVICE_NONE);
>-  v2 = GEN_INT (GOMP_DEVICE_HOST);
>-#endif
>   machine_mode target_mode = TYPE_MODE (integer_type_node);
>   if (!target || !register_operand (target, target_mode))
>     target = gen_reg_rtx (target_mode);
>@@ -5945,6 +5942,9 @@ expand_builtin_acc_on_device (tree exp, rtx
>target)
>   emit_label (done_label);
> 
>   return target;
>+#else
>+  return NULL;
>+#endif
> }
> 
> 
>diff --git gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
>gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
>index 2f4ee2b..7fe4e4e 100644
>--- gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
>+++ gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
>@@ -20,10 +20,18 @@ f (void)
> }
> 
>/* With -fopenacc, we're expecting the builtin to be expanded, so no
>calls.
>+
>  TODO: in C++, even under extern "C", the use of enum for acc_device_t
>perturbs expansion as a builtin, which expects an int parameter.  It's
>fine
>when changing acc_device_t to plain int, but that's not what we're
>doing in
>    <openacc.h>.
>-   { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device"
>0 "expand" { xfail c++ } } } */
>+
>+   TODO: given that we can't expand acc_on_device in
>+   gcc/builtins.c:expand_builtin_acc_on_device for in the
>!ACCEL_COMPILER case
>+   (because at that point we don't know whether we're acc_device_host
>or
>+   acc_device_host_nonshm), we'll (erroneously) get a library call in
>the host
>+   code.
>+
>+   { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device"
>0 "expand" { xfail { c || c++ } } } } */
> 
> /* { dg-final { cleanup-rtl-dump "expand" } } */
>diff --git libgomp/oacc-init.c libgomp/oacc-init.c
>index dc40fb6..a7c2e0d 100644
>--- libgomp/oacc-init.c
>+++ libgomp/oacc-init.c
>@@ -29,6 +29,7 @@
> #include "libgomp.h"
> #include "oacc-int.h"
> #include "openacc.h"
>+#include "plugin/plugin-host.h"
> #include <assert.h>
> #include <stdlib.h>
> #include <strings.h>
>@@ -548,11 +549,18 @@ ialias (acc_set_device_num)
> int
> acc_on_device (acc_device_t dev)
> {
>-  if (acc_get_device_type () == acc_device_host_nonshm)
>+  struct goacc_thread *thr = goacc_thread ();
>+
>+  /* We only want to appear to be the "host_nonshm" plugin from
>"offloaded"
>+     code -- i.e. within a parallel region.  Test a flag set by the
>+     openacc_parallel hook of the host_nonshm plugin to determine
>that.  */
>+  if (acc_get_device_type () == acc_device_host_nonshm
>+      && thr && thr->target_tls
>+      && ((struct nonshm_thread *)thr->target_tls)->nonshm_exec)
>    return dev == acc_device_host_nonshm || dev == acc_device_not_host;
> 
>-  /* Just rely on the compiler builtin.  */
>-  return __builtin_acc_on_device (dev);
>+  /* For OpenACC, libgomp is only built for the host, so this is
>sufficient.  */
>+  return dev == acc_device_host || dev == acc_device_none;
> }
> 
> ialias (acc_on_device)
>diff --git libgomp/plugin/plugin-host.c libgomp/plugin/plugin-host.c
>index 1faf5bc..3cb4dab 100644
>--- libgomp/plugin/plugin-host.c
>+++ libgomp/plugin/plugin-host.c
>@@ -44,6 +44,7 @@
> #include <stdlib.h>
> #include <string.h>
> #include <stdio.h>
>+#include <stdbool.h>
> 
> #ifdef HOST_NONSHM_PLUGIN
> #define STATIC
>@@ -55,6 +56,10 @@
> #define SELF "host: "
> #endif
> 
>+#ifdef HOST_NONSHM_PLUGIN
>+#include "plugin-host.h"
>+#endif
>+
> STATIC const char *
> GOMP_OFFLOAD_get_name (void)
> {
>@@ -174,7 +179,10 @@ GOMP_OFFLOAD_openacc_parallel (void (*fn) (void
>*),
> 			       void *targ_mem_desc __attribute__ ((unused)))
> {
> #ifdef HOST_NONSHM_PLUGIN
>+  struct nonshm_thread *thd = GOMP_PLUGIN_acc_thread ();
>+  thd->nonshm_exec = true;
>   fn (devaddrs);
>+  thd->nonshm_exec = false;
> #else
>   fn (hostaddrs);
> #endif
>@@ -232,11 +240,20 @@ STATIC void *
> GOMP_OFFLOAD_openacc_create_thread_data (int ord
> 					 __attribute__ ((unused)))
> {
>+#ifdef HOST_NONSHM_PLUGIN
>+  struct nonshm_thread *thd
>+    = GOMP_PLUGIN_malloc (sizeof (struct nonshm_thread));
>+  thd->nonshm_exec = false;
>+  return thd;
>+#else
>   return NULL;
>+#endif
> }
> 
> STATIC void
>-GOMP_OFFLOAD_openacc_destroy_thread_data (void *tls_data
>-					  __attribute__ ((unused)))
>+GOMP_OFFLOAD_openacc_destroy_thread_data (void *tls_data)
> {
>+#ifdef HOST_NONSHM_PLUGIN
>+  free (tls_data);
>+#endif
> }
>diff --git libgomp/plugin/plugin-host.h libgomp/plugin/plugin-host.h
>new file mode 100644
>index 0000000..96955d1
>--- /dev/null
>+++ libgomp/plugin/plugin-host.h
>@@ -0,0 +1,37 @@
>+/* OpenACC Runtime Library: acc_device_host, acc_device_host_nonshm.
>+
>+   Copyright (C) 2015 Free Software Foundation, Inc.
>+
>+   Contributed by Mentor Embedded.
>+
>+   This file is part of the GNU Offloading and Multi Processing
>Library
>+   (libgomp).
>+
>+   Libgomp is free software; you can redistribute it and/or modify it
>+   under the terms of the GNU General Public License as published by
>+   the Free Software Foundation; either version 3, or (at your option)
>+   any later version.
>+
>+   Libgomp is distributed in the hope that it will be useful, but
>WITHOUT ANY
>+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
>FITNESS
>+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>+   more details.
>+
>+   Under Section 7 of GPL version 3, you are granted additional
>+   permissions described in the GCC Runtime Library Exception, version
>+   3.1, as published by the Free Software Foundation.
>+
>+   You should have received a copy of the GNU General Public License
>and
>+   a copy of the GCC Runtime Library Exception along with this
>program;
>+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not,
>see
>+   <http://www.gnu.org/licenses/>.  */
>+
>+#ifndef PLUGIN_HOST_H
>+#define PLUGIN_HOST_H
>+
>+struct nonshm_thread
>+{
>+  bool nonshm_exec;
>+};
>+
>+#endif
>
>
>Grüße,
> Thomas
diff mbox

Patch

diff --git gcc/builtins.c gcc/builtins.c
index 9263777..bcbc11d 100644
--- gcc/builtins.c
+++ gcc/builtins.c
@@ -5915,8 +5915,10 @@  expand_stack_save (void)
    acceleration device (ACCEL_COMPILER conditional).  */
 
 static rtx
-expand_builtin_acc_on_device (tree exp, rtx target)
+expand_builtin_acc_on_device (tree exp ATTRIBUTE_UNUSED,
+			      rtx target ATTRIBUTE_UNUSED)
 {
+#ifdef ACCEL_COMPILER
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
 
@@ -5925,13 +5927,8 @@  expand_builtin_acc_on_device (tree exp, rtx target)
   /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
   machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
   rtx v = expand_normal (arg), v1, v2;
-#ifdef ACCEL_COMPILER
   v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
   v2 = GEN_INT (ACCEL_COMPILER_acc_device);
-#else
-  v1 = GEN_INT (GOMP_DEVICE_NONE);
-  v2 = GEN_INT (GOMP_DEVICE_HOST);
-#endif
   machine_mode target_mode = TYPE_MODE (integer_type_node);
   if (!target || !register_operand (target, target_mode))
     target = gen_reg_rtx (target_mode);
@@ -5945,6 +5942,9 @@  expand_builtin_acc_on_device (tree exp, rtx target)
   emit_label (done_label);
 
   return target;
+#else
+  return NULL;
+#endif
 }
 
 
diff --git gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
index 2f4ee2b..7fe4e4e 100644
--- gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
+++ gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
@@ -20,10 +20,18 @@  f (void)
 }
 
 /* With -fopenacc, we're expecting the builtin to be expanded, so no calls.
+
    TODO: in C++, even under extern "C", the use of enum for acc_device_t
    perturbs expansion as a builtin, which expects an int parameter.  It's fine
    when changing acc_device_t to plain int, but that's not what we're doing in
    <openacc.h>.
-   { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 "expand" { xfail c++ } } } */
+
+   TODO: given that we can't expand acc_on_device in
+   gcc/builtins.c:expand_builtin_acc_on_device for in the !ACCEL_COMPILER case
+   (because at that point we don't know whether we're acc_device_host or
+   acc_device_host_nonshm), we'll (erroneously) get a library call in the host
+   code.
+
+   { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 "expand" { xfail { c || c++ } } } } */
 
 /* { dg-final { cleanup-rtl-dump "expand" } } */
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index dc40fb6..a7c2e0d 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -29,6 +29,7 @@ 
 #include "libgomp.h"
 #include "oacc-int.h"
 #include "openacc.h"
+#include "plugin/plugin-host.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <strings.h>
@@ -548,11 +549,18 @@  ialias (acc_set_device_num)
 int
 acc_on_device (acc_device_t dev)
 {
-  if (acc_get_device_type () == acc_device_host_nonshm)
+  struct goacc_thread *thr = goacc_thread ();
+
+  /* We only want to appear to be the "host_nonshm" plugin from "offloaded"
+     code -- i.e. within a parallel region.  Test a flag set by the
+     openacc_parallel hook of the host_nonshm plugin to determine that.  */
+  if (acc_get_device_type () == acc_device_host_nonshm
+      && thr && thr->target_tls
+      && ((struct nonshm_thread *)thr->target_tls)->nonshm_exec)
     return dev == acc_device_host_nonshm || dev == acc_device_not_host;
 
-  /* Just rely on the compiler builtin.  */
-  return __builtin_acc_on_device (dev);
+  /* For OpenACC, libgomp is only built for the host, so this is sufficient.  */
+  return dev == acc_device_host || dev == acc_device_none;
 }
 
 ialias (acc_on_device)
diff --git libgomp/plugin/plugin-host.c libgomp/plugin/plugin-host.c
index 1faf5bc..3cb4dab 100644
--- libgomp/plugin/plugin-host.c
+++ libgomp/plugin/plugin-host.c
@@ -44,6 +44,7 @@ 
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include <stdbool.h>
 
 #ifdef HOST_NONSHM_PLUGIN
 #define STATIC
@@ -55,6 +56,10 @@ 
 #define SELF "host: "
 #endif
 
+#ifdef HOST_NONSHM_PLUGIN
+#include "plugin-host.h"
+#endif
+
 STATIC const char *
 GOMP_OFFLOAD_get_name (void)
 {
@@ -174,7 +179,10 @@  GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *),
 			       void *targ_mem_desc __attribute__ ((unused)))
 {
 #ifdef HOST_NONSHM_PLUGIN
+  struct nonshm_thread *thd = GOMP_PLUGIN_acc_thread ();
+  thd->nonshm_exec = true;
   fn (devaddrs);
+  thd->nonshm_exec = false;
 #else
   fn (hostaddrs);
 #endif
@@ -232,11 +240,20 @@  STATIC void *
 GOMP_OFFLOAD_openacc_create_thread_data (int ord
 					 __attribute__ ((unused)))
 {
+#ifdef HOST_NONSHM_PLUGIN
+  struct nonshm_thread *thd
+    = GOMP_PLUGIN_malloc (sizeof (struct nonshm_thread));
+  thd->nonshm_exec = false;
+  return thd;
+#else
   return NULL;
+#endif
 }
 
 STATIC void
-GOMP_OFFLOAD_openacc_destroy_thread_data (void *tls_data
-					  __attribute__ ((unused)))
+GOMP_OFFLOAD_openacc_destroy_thread_data (void *tls_data)
 {
+#ifdef HOST_NONSHM_PLUGIN
+  free (tls_data);
+#endif
 }
diff --git libgomp/plugin/plugin-host.h libgomp/plugin/plugin-host.h
new file mode 100644
index 0000000..96955d1
--- /dev/null
+++ libgomp/plugin/plugin-host.h
@@ -0,0 +1,37 @@ 
+/* OpenACC Runtime Library: acc_device_host, acc_device_host_nonshm.
+
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef PLUGIN_HOST_H
+#define PLUGIN_HOST_H
+
+struct nonshm_thread
+{
+  bool nonshm_exec;
+};
+
+#endif