diff mbox series

libgomp: Fix declare target link with offset array-section mapping [PR116107]

Message ID 9b132853-8fce-4433-9683-c2cba31367c2@baylibre.com
State New
Headers show
Series libgomp: Fix declare target link with offset array-section mapping [PR116107] | expand

Commit Message

Tobias Burnus July 26, 2024, 6:05 p.m. UTC
The main idea of 'link' is to permit putting only a subset of a
huge array on the device. Well, in order to make this work properly,
it requires that one can map an array section, which does not
start with the first element.

This patch adjusts the pointers such, that this actually works.

(Tested on x86-64-gnu-linux with Nvptx offloading.)
Comments, suggestions, remarks before I commit it?

Tobias

Comments

Jakub Jelinek July 29, 2024, 8:18 a.m. UTC | #1
On Fri, Jul 26, 2024 at 08:05:43PM +0200, Tobias Burnus wrote:
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -1820,8 +1820,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
>  		if (k->aux && k->aux->link_key)
>  		  {
>  		    /* Set link pointer on target to the device address of the
> -		       mapped object.  */
> -		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
> +		       mapped object. Also deal with offsets due to
> +		       array-section mapping. */

Formatting.  Two spaces after . in both spots.

> +		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
> +					       - (k->host_start
> +						  - k->aux->link_key->host_start));

Otherwise LGTM.

	Jakub
Thomas Schwinge Aug. 7, 2024, 8:46 a.m. UTC | #2
Hi Tobias!

On 2024-07-26T20:05:43+0200, Tobias Burnus <tburnus@baylibre.com> wrote:
> The main idea of 'link' is to permit putting only a subset of a
> huge array on the device. Well, in order to make this work properly,
> it requires that one can map an array section, which does not
> start with the first element.
>
> This patch adjusts the pointers such, that this actually works.
>
> (Tested on x86-64-gnu-linux with Nvptx offloading.)
> Comments, suggestions, remarks before I commit it?

> libgomp: Fix declare target link with offset array-section mapping [PR116107]
>
> Assume that 'int var[100]' is 'omp declare target link(var)'. When now
> mapping an array section with offset such as 'map(to:var[20:10])',
> the device-side link pointer has to store &<device-storage-data>[0] minus
> the offset such that var[20] will access <device-storage-data>[0]. But
> the offset calculation was missed such that the device-side 'var' pointed
> to the first element of the mapped data - and var[20] points beyond at
> some invalid memory.
>
> 	PR middle-end/116107
>
> libgomp/ChangeLog:
>
> 	* target.c (gomp_map_vars_internal): Honor array mapping offsets
> 	with declare-target 'link' variables.
> 	* testsuite/libgomp.c-c++-common/target-link-2.c: New test.
>
>  libgomp/target.c                                   |  7 ++-
>  .../testsuite/libgomp.c-c++-common/target-link-2.c | 59 ++++++++++++++++++++++
>  2 files changed, 64 insertions(+), 2 deletions(-)

The new test case 'libgomp.c-c++-common/target-link-2.c' generally PASSes
on one-GPU systems, but on a multi-GPU system (tested nvidia5):

    $ nvidia-smi -L
    GPU 0: Tesla K80 (UUID: [...])
    GPU 1: Tesla K80 (UUID: [...])

..., I see:

    +PASS: libgomp.c/../libgomp.c-c++-common/target-link-2.c (test for excess errors)
    +FAIL: libgomp.c/../libgomp.c-c++-common/target-link-2.c execution test

    +PASS: libgomp.c++/../libgomp.c-c++-common/target-link-2.c (test for excess errors)
    +FAIL: libgomp.c++/../libgomp.c-c++-common/target-link-2.c execution test

    [...]
    #2  0x00007ffff7b548fc in __GI_abort () at abort.c:79
    #3  0x0000000010000bd4 in main () at [...]/libgomp.c-c++-common/target-link-2.c:38
    (gdb) frame 3
    #3  0x0000000010000bd4 in main () at [...]/libgomp.c-c++-common/target-link-2.c:38
    38              __builtin_abort ();
    (gdb) list
    33
    34            #pragma omp target map(from: res2) device(dev)
    35              res2 = arr[5];
    36
    37            if (res2 != 6)
    38              __builtin_abort ();
    [...]
    (gdb) print res2
    $1 = 60

I first thought that maybe just:

    --- libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
    +++ libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
    @@ -54,6 +54,8 @@ int main()
           for (int i = 0; i < 10; i++)
            if (res[i] != (4 + i)*10)
              __builtin_abort ();
    +
    +      #pragma omp target exit data map(release:arr[3:10]) device(dev)
         }
       return 0;
     }

... was missing, but that doesn't resolve the issue: same error state.
Could you please have a look what other state needs to be reset, in which
way?


Grüße
 Thomas


> diff --git a/libgomp/target.c b/libgomp/target.c
> index aa01c1367b9..e3e648f5443 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -1820,8 +1820,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
>  		if (k->aux && k->aux->link_key)
>  		  {
>  		    /* Set link pointer on target to the device address of the
> -		       mapped object.  */
> -		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
> +		       mapped object. Also deal with offsets due to
> +		       array-section mapping. */
> +		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
> +					       - (k->host_start
> +						  - k->aux->link_key->host_start));
>  		    /* We intentionally do not use coalescing here, as it's not
>  		       data allocated by the current call to this function.  */
>  		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
> diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
> new file mode 100644
> index 00000000000..4ff4080da76
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
> @@ -0,0 +1,59 @@
> +/* PR middle-end/116107  */
> +
> +#include <omp.h>
> +
> +int arr[15] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> +#pragma omp declare target link(arr)
> +
> +#pragma omp begin declare target
> +void f(int *res)
> +{
> +  __builtin_memcpy (res, &arr[5], sizeof(int)*10);
> +}
> +
> +void g(int *res)
> +{
> +  __builtin_memcpy (res, &arr[3], sizeof(int)*10);
> +}
> +#pragma omp end declare target
> +
> +int main()
> +{
> +  int res[10], res2;
> +  for (int dev = 0; dev < omp_get_num_devices(); dev++)
> +    {
> +      __builtin_memset (res, 0, sizeof (res));
> +      res2 = 99;
> +
> +      #pragma omp target enter data map(arr[5:10]) device(dev)
> +
> +      #pragma omp target map(from: res) device(dev)
> +	f (res);
> +
> +      #pragma omp target map(from: res2) device(dev)
> +	res2 = arr[5];
> +
> +      if (res2 != 6)
> +	__builtin_abort ();
> +      for (int i = 0; i < 10; i++)
> +	if (res[i] != 6 + i)
> +	  __builtin_abort ();
> +
> +      #pragma omp target exit data map(release:arr[5:10]) device(dev)
> +
> +      for (int i = 0; i < 15; i++)
> +	res[i] *= 10;
> +	  __builtin_abort ();
> +
> +      #pragma omp target enter data map(arr[3:10]) device(dev)
> +      __builtin_memset (res, 0, sizeof (res));
> +
> +      #pragma omp target map(from: res) device(dev)
> +	g (res);
> +
> +      for (int i = 0; i < 10; i++)
> +	if (res[i] != (4 + i)*10)
> +	  __builtin_abort ();
> +    }
> +  return 0;
> +}
diff mbox series

Patch

libgomp: Fix declare target link with offset array-section mapping [PR116107]

Assume that 'int var[100]' is 'omp declare target link(var)'. When now
mapping an array section with offset such as 'map(to:var[20:10])',
the device-side link pointer has to store &<device-storage-data>[0] minus
the offset such that var[20] will access <device-storage-data>[0]. But
the offset calculation was missed such that the device-side 'var' pointed
to the first element of the mapped data - and var[20] points beyond at
some invalid memory.

	PR middle-end/116107

libgomp/ChangeLog:

	* target.c (gomp_map_vars_internal): Honor array mapping offsets
	with declare-target 'link' variables.
	* testsuite/libgomp.c-c++-common/target-link-2.c: New test.

 libgomp/target.c                                   |  7 ++-
 .../testsuite/libgomp.c-c++-common/target-link-2.c | 59 ++++++++++++++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/libgomp/target.c b/libgomp/target.c
index aa01c1367b9..e3e648f5443 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1820,8 +1820,11 @@  gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		if (k->aux && k->aux->link_key)
 		  {
 		    /* Set link pointer on target to the device address of the
-		       mapped object.  */
-		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
+		       mapped object. Also deal with offsets due to
+		       array-section mapping. */
+		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
+					       - (k->host_start
+						  - k->aux->link_key->host_start));
 		    /* We intentionally do not use coalescing here, as it's not
 		       data allocated by the current call to this function.  */
 		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
new file mode 100644
index 00000000000..4ff4080da76
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
@@ -0,0 +1,59 @@ 
+/* PR middle-end/116107  */
+
+#include <omp.h>
+
+int arr[15] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+#pragma omp declare target link(arr)
+
+#pragma omp begin declare target
+void f(int *res)
+{
+  __builtin_memcpy (res, &arr[5], sizeof(int)*10);
+}
+
+void g(int *res)
+{
+  __builtin_memcpy (res, &arr[3], sizeof(int)*10);
+}
+#pragma omp end declare target
+
+int main()
+{
+  int res[10], res2;
+  for (int dev = 0; dev < omp_get_num_devices(); dev++)
+    {
+      __builtin_memset (res, 0, sizeof (res));
+      res2 = 99;
+
+      #pragma omp target enter data map(arr[5:10]) device(dev)
+
+      #pragma omp target map(from: res) device(dev)
+	f (res);
+
+      #pragma omp target map(from: res2) device(dev)
+	res2 = arr[5];
+
+      if (res2 != 6)
+	__builtin_abort ();
+      for (int i = 0; i < 10; i++)
+	if (res[i] != 6 + i)
+	  __builtin_abort ();
+
+      #pragma omp target exit data map(release:arr[5:10]) device(dev)
+
+      for (int i = 0; i < 15; i++)
+	res[i] *= 10;
+	  __builtin_abort ();
+
+      #pragma omp target enter data map(arr[3:10]) device(dev)
+      __builtin_memset (res, 0, sizeof (res));
+
+      #pragma omp target map(from: res) device(dev)
+	g (res);
+
+      for (int i = 0; i < 10; i++)
+	if (res[i] != (4 + i)*10)
+	  __builtin_abort ();
+    }
+  return 0;
+}