diff mbox series

Re-unify 'omp_build_component_ref' and 'oacc_build_component_ref'

Message ID 87r1f2puss.fsf@euler.schwinge.homeip.net
State New
Headers show
Series Re-unify 'omp_build_component_ref' and 'oacc_build_component_ref' | expand

Commit Message

Thomas Schwinge Aug. 9, 2021, 2:16 p.m. UTC
[from internal]


Hi!

This concerns a class of ICEs seen as of og10 branch with the
"openacc: Middle-end worker-partitioning support" and "amdgcn:
Enable OpenACC worker partitioning for AMD GCN" changes applied:

On 2020-06-06T16:07:36+0100, Kwok Cheung Yeung <kwok_yeung@mentor.com> wrote:
> On 01/06/2020 8:48 pm, Kwok Cheung Yeung wrote:
>> On 21/05/2020 10:23 pm, Kwok Cheung Yeung wrote:
>>> These all have the same failure mode:
>>>
>>> during RTL pass: expand
>>> [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90: In function 'MAIN__._omp_fn.1':
>>> [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90:86: internal compiler error: in convert_memory_address_addr_space_1, at explow.c:302
>>> 0xc29f20 convert_memory_address_addr_space_1(scalar_int_mode, rtx_def*, unsigned char, bool, bool)
>>>          [...]/gcc/explow.c:302
>>> 0xc29f57 convert_memory_address_addr_space(scalar_int_mode, rtx_def*, unsigned char)
>>>          [...]/gcc/explow.c:404
>>> [...]

>>> This occurs if the -ftree-slp-vectorize flag is specified (default at -O3).

>> The problematic bit of Gimple code is this:
>>
>>    .oacc_worker_o.44._120 = gangs_min_472;
>>    .oacc_worker_o.44._122 = workers_min_473;
>>    .oacc_worker_o.44._124 = vectors_min_474;
>>    .oacc_worker_o.44._126 = gangs_max_475;
>>    .oacc_worker_o.44._128 = workers_max_476;
>>    .oacc_worker_o.44._130 = vectors_max_477;
>>    .oacc_worker_o.44._132 = 0;
>>
>> With SLP vectorization enabled, it becomes this:
>>
>>    _40 = {gangs_min_472, workers_min_473, vectors_min_474, gangs_max_475};
>>    ...
>>    MEM <vector(4) int> [(int *)&.oacc_worker_o.44] = _40;
>>    .oacc_worker_o.44._128 = workers_max_476;
>>    .oacc_worker_o.44._130 = vectors_max_477;
>>    .oacc_worker_o.44._132 = 0;
>>
>> The optimization is trying to transform 4 separate assignments into a single
>> memory operation. The trouble is that &o.acc_worker_o is an SImode pointer in
>> AS4 (LDS), while the memory expression appears to be in the default memory
>> space. The 'to' expression of the assignment is:
>>
>>   <mem_ref 0x7ffff74c61e0
>>      type <vector_type 0x7ffff7470498
>>          type <integer_type 0x7ffff73195e8 int public SI
>>              size <integer_cst 0x7ffff7318bb8 constant 32>
>>              unit-size <integer_cst 0x7ffff7318bd0 constant 4>
>>              align:32 warn_if_not_align:0 symtab:0 alias-set 1 canonical-type 0x7ffff73195e8 precision:32 min <integer_cst 0x7ffff7318b70 -2147483648> max <integer_cst 0x7ffff7318b88 2147483647>
>>              pointer_to_this <pointer_type 0x7ffff73209d8> reference_to_this <reference_type 0x7ffff73d9d20>>
>>          TI
>>          size <integer_cst 0x7ffff7318ca8 constant 128>
>>          unit-size <integer_cst 0x7ffff7318cc0 constant 16>
>>          align:128 warn_if_not_align:0 symtab:0 alias-set 1 structural-equality nunits:4
>>          pointer_to_this <pointer_type 0x7ffff7470540>>
>>
>>      arg:0 <addr_expr 0x7ffff74cdb80
>>          type <pointer_type 0x7ffff73209d8 type <integer_type 0x7ffff73195e8 int>
>>              public unsigned DI
>>              size <integer_cst 0x7ffff7318978 constant 64>
>>              unit-size <integer_cst 0x7ffff7318990 constant 8>
>>              align:64 warn_if_not_align:0 symtab:0 alias-set 2 structural-equality>
>>          constant
>>          arg:0 <var_decl 0x7ffff7477f30 .oacc_worker_o.44 type <record_type 0x7ffff73eb888 .oacc_ws_data_s.21 address-space-4>
>>              addressable used static ignored BLK [...]/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90:86:0
>>
>>              size <integer_cst 0x7ffff746ce70 constant 224>
>>              unit-size <integer_cst 0x7ffff746ce40 constant 28>
>>              align:128 warn_if_not_align:0
>>              (mem/c:BLK (symbol_ref:SI (".oacc_worker_o.44.14") [flags 0x2] <var_decl 0x7ffff7477f30 .oacc_worker_o.44>) [9 .oacc_worker_o.44+0 S28 A128 AS4])>>
>>      arg:1 <integer_cst 0x7ffff73ff078 type <pointer_type 0x7ffff73209d8> constant 0>>
>>
>> In convert_memory_address_addr_space_1:
>>
>> #ifndef POINTERS_EXTEND_UNSIGNED
>>    gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
>>    return x;
>> #else /* defined(POINTERS_EXTEND_UNSIGNED) */
>>
>> POINTERS_EXTEND_UNSIGNED is not defined, so it hits the assert. The expected
>> to_mode is DI_mode, but x is SI_mode, so the assert fires.

> I now have a fix for this.
>
>  >    MEM <vector(4) int> [(int *)&.oacc_worker_o.44] = _40;
>
> The ICE occurs because the SLP vectorization pass creates the new statement
> using the type of the expression '&.oacc_worker_o.44', which is a pointer to a
> component ref in the default address space. The expand pass gets confused
> because it is handed an SImode pointer (for LDS) when it is expecting a DImode
> pointer (for flat/global space).
>
> The underlying problem is that although .oacc_worker_o is in the correct address
> space, the component ref .oacc_worker_o is not. I fixed this by propagating the
> address space of .oacc_worker_o when the component ref is created.

>  static tree
>  oacc_build_component_ref (tree obj, tree field)
>  {
> -  tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
> +  tree field_type = TREE_TYPE (field);
> +  tree obj_type = TREE_TYPE (obj);
> +  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
> +    field_type = build_qualified_type
> +                     (field_type,
> +                      KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
> +
> +  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
>    if (TREE_THIS_VOLATILE (field))
>      TREE_THIS_VOLATILE (ret) |= 1;
>    if (TREE_READONLY (field))

This code change has been included in the recent master branch commit
e2a58ed6dc5293602d0d168475109caa81ad0f0d "openacc: Middle-end
worker-partitioning support", which thus includes a
'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref' that is
slightly different from 'gcc/omp-low.c:omp_build_component_ref'.

I'm confirming that with this reverted, we're seeing ICEs as follows:

    +FAIL: libgomp.oacc-fortran/gemm-2.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/gemm-2.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/gemm.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/gemm.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/optional-reduction.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/optional-reduction.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/private-variables.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/private-variables.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/reduction-1.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/reduction-1.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/reduction-5.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/reduction-5.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/reduction-6.f90 [...] -foffload=amdgcn-amdhsa  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)

    +FAIL: libgomp.oacc-fortran/reduction-6.f90 [...] -foffload=amdgcn-amdhsa  -O3 -g  (internal compiler error)

Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
current set of offloading testcases, we never see a
'!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
to be necessary there (but also won't do any harm: no-op).

Would it make sense to "Re-unify 'omp_build_component_ref' and
'oacc_build_component_ref'", see attached?


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
diff mbox series

Patch

From caee66cf2abd0bea3ee99b460a108ae0d69d599f Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Fri, 30 Jul 2021 16:15:25 +0200
Subject: [PATCH] Re-unify 'omp_build_component_ref' and
 'oacc_build_component_ref'

	gcc/
	* omp-general.c (omp_build_component_ref): New function,
	renamed/moved from...
	* omp-oacc-neuter-broadcast.cc (oacc_build_component_ref):
	... here.
	(build_receiver_ref, build_sender_ref): Update.
	* omp-low.c (omp_build_component_ref): Remove function.
	* omp-general.h (omp_build_component_ref): Declare function.
---
 gcc/omp-general.c                | 21 +++++++++++++++++++++
 gcc/omp-general.h                |  2 ++
 gcc/omp-low.c                    | 15 ---------------
 gcc/omp-oacc-neuter-broadcast.cc | 26 ++------------------------
 4 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/gcc/omp-general.c b/gcc/omp-general.c
index b46a537e281..67a0b752f62 100644
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -2815,4 +2815,25 @@  oacc_get_ifn_dim_arg (const gimple *stmt)
   return (int) axis;
 }
 
+/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
+   as appropriate.  */
+
+tree
+omp_build_component_ref (tree obj, tree field)
+{
+  tree field_type = TREE_TYPE (field);
+  tree obj_type = TREE_TYPE (obj);
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
+    field_type
+      = build_qualified_type (field_type,
+			      KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
+
+  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
+  if (TREE_THIS_VOLATILE (field))
+    TREE_THIS_VOLATILE (ret) |= 1;
+  if (TREE_READONLY (field))
+    TREE_READONLY (ret) |= 1;
+  return ret;
+}
+
 #include "gt-omp-general.h"
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index 5c3e0f0e205..6525175832c 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -145,4 +145,6 @@  get_openacc_privatization_dump_flags ()
   return l_dump_flags;
 }
 
+extern tree omp_build_component_ref (tree obj, tree field);
+
 #endif /* GCC_OMP_GENERAL_H */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 926087da701..1640321c445 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -613,21 +613,6 @@  omp_copy_decl_1 (tree var, omp_context *ctx)
   return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
 }
 
-/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
-   as appropriate.  */
-/* See also 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref'.  */
-
-static tree
-omp_build_component_ref (tree obj, tree field)
-{
-  tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
-  if (TREE_THIS_VOLATILE (field))
-    TREE_THIS_VOLATILE (ret) |= 1;
-  if (TREE_READONLY (field))
-    TREE_READONLY (ret) |= 1;
-  return ret;
-}
-
 /* Build tree nodes to access the field for VAR on the receiver side.  */
 
 static tree
diff --git a/gcc/omp-oacc-neuter-broadcast.cc b/gcc/omp-oacc-neuter-broadcast.cc
index f8555380451..720cf74f12f 100644
--- a/gcc/omp-oacc-neuter-broadcast.cc
+++ b/gcc/omp-oacc-neuter-broadcast.cc
@@ -936,28 +936,6 @@  worker_single_simple (basic_block from, basic_block to,
   update_stmt (acc_bar);
 }
 
-/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
-   as appropriate.  */
-/* Adapted from 'gcc/omp-low.c:omp_build_component_ref'.  */
-
-static tree
-oacc_build_component_ref (tree obj, tree field)
-{
-  tree field_type = TREE_TYPE (field);
-  tree obj_type = TREE_TYPE (obj);
-  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type)))
-    field_type = build_qualified_type
-			(field_type,
-			 KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type)));
-
-  tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL);
-  if (TREE_THIS_VOLATILE (field))
-    TREE_THIS_VOLATILE (ret) |= 1;
-  if (TREE_READONLY (field))
-    TREE_READONLY (ret) |= 1;
-  return ret;
-}
-
 static tree
 build_receiver_ref (tree record_type, tree var, tree receiver_decl)
 {
@@ -965,7 +943,7 @@  build_receiver_ref (tree record_type, tree var, tree receiver_decl)
   tree x = build_simple_mem_ref (receiver_decl);
   tree field = *fields->get (var);
   TREE_THIS_NOTRAP (x) = 1;
-  x = oacc_build_component_ref (x, field);
+  x = omp_build_component_ref (x, field);
   return x;
 }
 
@@ -974,7 +952,7 @@  build_sender_ref (tree record_type, tree var, tree sender_decl)
 {
   field_map_t *fields = *field_map->get (record_type);
   tree field = *fields->get (var);
-  return oacc_build_component_ref (sender_decl, field);
+  return omp_build_component_ref (sender_decl, field);
 }
 
 static int
-- 
2.30.2