@@ -15075,6 +15075,190 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt)
{
return new pass_diagnose_omp_blocks (ctxt);
}
+
+/* Provide transformation required for using unified shared memory
+ by replacing calls to standard memory allocation functions with
+ function provided by the libgomp. */
+
+static tree
+usm_transform (gimple_stmt_iterator *gsi_p, bool *,
+ struct walk_stmt_info *wi)
+{
+ gimple *stmt = gsi_stmt (*gsi_p);
+ /* ompx_gnu_unified_shared_mem_alloc is 201.
+ This must match the definition in libgomp/omp.h.in. */
+ const unsigned int unified_shared_mem_alloc = 201;
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_CALL:
+ {
+ gcall *gs = as_a <gcall *> (stmt);
+ tree fndecl = gimple_call_fndecl (gs);
+ unsigned int args = gimple_call_num_args (gs);
+ if (fndecl)
+ {
+ tree allocator = build_int_cst (pointer_sized_int_node,
+ unified_shared_mem_alloc);
+ const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+ if ((strcmp (name, "malloc") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MALLOC)
+ || (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl)
+ && args == 1)
+ || strcmp (name, "omp_target_alloc") == 0)
+ {
+ tree omp_alloc_type
+ = build_function_type_list (ptr_type_node, size_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_alloc", omp_alloc_type);
+ tree size = gimple_call_arg (gs, 0);
+ gimple *g = gimple_build_call (repl, 2, size, allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if ((strcmp (name, "aligned_alloc") == 0)
+ || (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl)
+ && args == 2))
+ {
+ /* May be we can also use this for new operator with
+ std::align_val_t parameter. */
+ tree omp_alloc_type
+ = build_function_type_list (ptr_type_node, size_type_node,
+ size_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_aligned_alloc",
+ omp_alloc_type);
+ int align_arg
+ = DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl) ? 1: 0;
+ int size_arg
+ = DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl) ? 0: 1;
+ tree align = gimple_call_arg (gs, align_arg);
+ tree size = gimple_call_arg (gs, size_arg);
+ gimple *g = gimple_build_call (repl, 3, align, size,
+ allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if ((strcmp (name, "calloc") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CALLOC))
+ {
+ tree omp_calloc_type
+ = build_function_type_list (ptr_type_node, size_type_node,
+ size_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_calloc", omp_calloc_type);
+ tree num = gimple_call_arg (gs, 0);
+ tree size = gimple_call_arg (gs, 1);
+ gimple *g = gimple_build_call (repl, 3, num, size, allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if ((strcmp (name, "realloc") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_REALLOC))
+ {
+ tree omp_realloc_type
+ = build_function_type_list (ptr_type_node, ptr_type_node,
+ size_type_node,
+ pointer_sized_int_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_realloc", omp_realloc_type);
+ tree ptr = gimple_call_arg (gs, 0);
+ tree size = gimple_call_arg (gs, 1);
+ gimple *g = gimple_build_call (repl, 4, ptr, size, allocator,
+ allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if ((strcmp (name, "free") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_FREE)
+ || (DECL_IS_OPERATOR_DELETE_P (fndecl)
+ && DECL_IS_REPLACEABLE_OPERATOR (fndecl))
+ || strcmp (name, "omp_target_free") == 0)
+ {
+ tree omp_free_type
+ = build_function_type_list (void_type_node, ptr_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_free", omp_free_type);
+ tree ptr = gimple_call_arg (gs, 0);
+ gimple *g = gimple_build_call (repl, 2, ptr, allocator);
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL_TREE;
+}
+
+namespace {
+
+const pass_data pass_data_usm_transform =
+{
+ GIMPLE_PASS, /* type */
+ "usm_transform", /* name */
+ OPTGROUP_OMP, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_gimple_any, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_usm_transform : public gimple_opt_pass
+{
+public:
+ pass_usm_transform (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_usm_transform, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return (flag_openmp || flag_openmp_simd)
+ /* Note: flag_offload_memory only sets omp_requires_mask if a
+ target directive is present, so we do need to check both. */
+ && ((omp_requires_mask & OMP_REQUIRES_UNIFIED_SHARED_MEMORY)
+ || flag_offload_memory == OFFLOAD_MEMORY_UNIFIED);
+ }
+ virtual unsigned int execute (function *)
+ {
+ struct walk_stmt_info wi;
+ gimple_seq body = gimple_body (current_function_decl);
+
+ memset (&wi, 0, sizeof (wi));
+ walk_gimple_seq (body, usm_transform, NULL, &wi);
+
+ return 0;
+ }
+
+}; // class pass_usm_transform
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_usm_transform (gcc::context *ctxt)
+{
+ return new pass_usm_transform (ctxt);
+}
#include "gt-omp-low.h"
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_diagnose_tm_blocks);
NEXT_PASS (pass_omp_oacc_kernels_decompose);
NEXT_PASS (pass_lower_omp);
+ NEXT_PASS (pass_usm_transform);
NEXT_PASS (pass_lower_cf);
NEXT_PASS (pass_lower_tm);
NEXT_PASS (pass_refactor_eh);
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-usm_transform" } */
+
+#pragma omp requires unified_shared_memory
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *malloc (__SIZE_TYPE__);
+void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__);
+void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
+void *realloc(void *, __SIZE_TYPE__);
+void free (void *);
+void *omp_target_alloc (__SIZE_TYPE__, int);
+void omp_target_free (void *, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+void
+foo ()
+{
+ void *p1 = malloc(20);
+ void *p2 = realloc(p1, 30);
+ void *p3 = calloc(4, 15);
+ void *p4 = aligned_alloc(16, 40);
+ void *p5 = omp_target_alloc(50, 1);
+ free (p2);
+ free (p3);
+ free (p4);
+ omp_target_free (p5, 1);
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 201, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " free" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " aligned_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " malloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_free" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" } */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *malloc (__SIZE_TYPE__);
+void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__);
+void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
+void *realloc(void *, __SIZE_TYPE__);
+void free (void *);
+void *omp_target_alloc (__SIZE_TYPE__, int);
+void omp_target_free (void *, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+void
+foo ()
+{
+ void *p1 = malloc(20);
+ void *p2 = realloc(p1, 30);
+ void *p3 = calloc(4, 15);
+ void *p4 = aligned_alloc(16, 40);
+ void *p5 = omp_target_alloc(50, 1);
+ free (p2);
+ free (p3);
+ free (p4);
+ omp_target_free (p5, 1);
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 201, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " free" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " aligned_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " malloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_free" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+// { dg-do compile }
+// { dg-options "-fopenmp -fdump-tree-usm_transform" }
+
+#pragma omp requires unified_shared_memory
+
+struct t1
+{
+ int a;
+ int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+ uint8_t *p1 = new uint8_t;
+ uint8_t *p2 = new uint8_t[20];
+ t1 *p3 = new t1;
+ t1 *p4 = new t1[y];
+ delete p1;
+ delete p3;
+ delete [] p2;
+ delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,30 @@
+// { dg-do compile }
+// { dg-options "-fopenmp -foffload-memory=unified -fdump-tree-usm_transform" }
+
+struct t1
+{
+ int a;
+ int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+ uint8_t *p1 = new uint8_t;
+ uint8_t *p2 = new uint8_t[20];
+ t1 *p3 = new t1;
+ t1 *p4 = new t1[y];
+ delete p1;
+ delete p3;
+ delete [] p2;
+ delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,38 @@
+// { dg-do compile }
+// { dg-options "-fopenmp -fdump-tree-usm_transform" }
+
+#pragma omp requires unified_shared_memory
+
+#include <new>
+
+
+struct X {
+ static void* operator new(std::size_t count)
+ {
+ static char buf[10];
+ return &buf[0];
+ }
+ static void* operator new[](std::size_t count)
+ {
+ static char buf[10];
+ return &buf[0];
+ }
+ static void operator delete(void*)
+ {
+ }
+ static void operator delete[](void*)
+ {
+ }
+};
+void foo() {
+ X* p1 = new X;
+ delete p1;
+ X* p2 = new X[10];
+ delete[] p2;
+ unsigned char buf[24] ;
+ int *p3 = new (buf) int(3);
+ p3[0] = 1;
+}
+
+/* { dg-final { scan-tree-dump-not "omp_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "omp_free" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+// { dg-do compile { target c++17 } }
+// { dg-options "-fopenmp -fdump-tree-usm_transform" }
+
+#pragma omp requires unified_shared_memory
+
+struct t1
+{
+ int a;
+ int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+ uint8_t *p1 = new (std::align_val_t(128)) uint8_t;
+ uint8_t *p2 = new (std::align_val_t(128)) uint8_t[40];
+ t1 *p3 = new (std::align_val_t(128)) t1;
+ t1 *p4 = new (std::align_val_t(128)) t1[y];
+ delete p1;
+ delete p3;
+ delete [] p2;
+ delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(128, 1, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(128, 40, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,30 @@
+// { dg-do compile { target c++17 } }
+// { dg-options "-fopenmp -foffload-memory=unified -fdump-tree-usm_transform" }
+
+struct t1
+{
+ int a;
+ int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+ uint8_t *p1 = new (std::align_val_t(128)) uint8_t;
+ uint8_t *p2 = new (std::align_val_t(128)) uint8_t[40];
+ t1 *p3 = new (std::align_val_t(128)) t1;
+ t1 *p4 = new (std::align_val_t(128)) t1[y];
+ delete p1;
+ delete p3;
+ delete [] p2;
+ delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(128, 1, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(128, 40, 201\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-usm_transform" }
+
+!$omp requires unified_shared_memory
+end
+
+subroutine foo()
+ implicit none
+ integer, allocatable :: var1
+
+ allocate(var1)
+
+end subroutine
+
+! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform" } }
+! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform" } }
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" }
+
+subroutine foo()
+ implicit none
+ integer, allocatable :: var1
+
+ allocate(var1)
+
+end subroutine
+
+! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform" } }
+! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform" } }
\ No newline at end of file
@@ -431,6 +431,7 @@ extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_usm_transform (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt);
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+#include <stdint.h>
+
+#pragma omp requires unified_shared_memory
+
+int g1 = 0;
+
+struct s1
+{
+ s1() { a = g1++;}
+ ~s1() { g1--;}
+ int a;
+};
+
+int
+main ()
+{
+ s1 *p1 = new s1;
+ s1 *p2 = new s1[10];
+
+ if (!p1 || !p2 || p1->a != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i < 10; i++)
+ if (p2[i].a != i+1)
+ __builtin_abort ();
+
+ uintptr_t pp1 = (uintptr_t)p1;
+ uintptr_t pp2 = (uintptr_t)p2;
+
+#pragma omp target firstprivate(pp1, pp2)
+ {
+ s1 *t1 = (s1*)pp1;
+ s1 *t2 = (s1*)pp2;
+ if (t1->a != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i < 10; i++)
+ if (t2[i].a != i+1)
+ __builtin_abort ();
+
+ t1->a = 42;
+ }
+
+ if (p1->a != 42)
+ __builtin_abort ();
+
+ delete [] p2;
+ delete p1;
+ if (g1 != 0)
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-additional-options "-std=c++17" } */
+/* { dg-require-effective-target omp_usm } */
+#include <stdint.h>
+
+#pragma omp requires unified_shared_memory
+
+struct s1
+{
+ int a;
+};
+
+int
+main ()
+{
+ s1 *p1 = new s1;
+ s1 *p2 = new s1[10];
+
+ if (!p1 || !p2)
+ __builtin_abort ();
+
+ uintptr_t pp1 = (uintptr_t)p1;
+ uintptr_t pp2 = (uintptr_t)p2;
+ if (pp1 & 0x7f != 0)
+ __builtin_abort ();
+
+ if (pp2 & 0x7f != 0)
+ __builtin_abort ();
+
+ delete [] p2;
+ delete p1;
+ return 0;
+}
@@ -3,6 +3,8 @@
/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
/* { dg-additional-sources requires-4-aux.c } */
+/* { dg-xfail-run-if "USM via -foffload-memory=... does not support static variables" { offload_device_nvptx || offload_device_gcn } } */
+
/* Check no diagnostic by device-compiler's or host compiler's lto1.
Other file uses: 'requires reverse_offload', but that's inactive as
there are no declare target directives, device constructs nor device routines */
new file mode 100644
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <omp.h>
+
+/* On old systems, the declaration may not be present in stdlib.h which
+ will generate a warning. This function is going to be replaced with
+ omp_aligned_alloc so the purpose of this declaration is to avoid that
+ warning. */
+void *aligned_alloc(size_t alignment, size_t size);
+
+#pragma omp requires unified_shared_memory
+
+int
+main ()
+{
+ int *a = (int *) malloc(sizeof(int)*2);
+ int *b = (int *) calloc(sizeof(int), 3);
+ int *c = (int *) realloc(NULL, sizeof(int) * 4);
+ int *d = (int *) aligned_alloc(32, sizeof(int));
+ int *e = (int *) omp_target_alloc(sizeof(int), 1);
+ if (!a || !b || !c || !d || !e)
+ /* If this fails, check the definition of ompx_gnu_unified_shared_mem_alloc
+ in omp-low.cc matches the definition in omp.h.in. */
+ __builtin_abort ();
+
+ a[0] = 42;
+ a[1] = 43;
+ b[0] = 52;
+ b[1] = 53;
+ b[2] = 54;
+ c[0] = 62;
+ c[1] = 63;
+ c[2] = 64;
+ c[3] = 65;
+
+ uintptr_t a_p = (uintptr_t)a;
+ uintptr_t b_p = (uintptr_t)b;
+ uintptr_t c_p = (uintptr_t)c;
+ uintptr_t d_p = (uintptr_t)d;
+ uintptr_t e_p = (uintptr_t)e;
+
+ if (d_p & 31 != 0)
+ __builtin_abort ();
+
+#pragma omp target enter data map(to:a[0:2])
+
+#pragma omp target is_device_ptr(c)
+ {
+ if (a[0] != 42 || a_p != (uintptr_t)a)
+ __builtin_abort ();
+ if (b[0] != 52 || b[2] != 54 || b_p != (uintptr_t)b)
+ __builtin_abort ();
+ if (c[0] != 62 || c[3] != 65 || c_p != (uintptr_t)c)
+ __builtin_abort ();
+ if (d_p != (uintptr_t)d)
+ __builtin_abort ();
+ if (e_p != (uintptr_t)e)
+ __builtin_abort ();
+ a[0] = 72;
+ b[0] = 82;
+ c[0] = 92;
+ e[0] = 102;
+ }
+
+#pragma omp target
+ {
+ if (a[1] != 43 || a_p != (uintptr_t)a)
+ __builtin_abort ();
+ if (b[1] != 53 || b_p != (uintptr_t)b)
+ __builtin_abort ();
+ if (c[1] != 63 || c[2] != 64 || c_p != (uintptr_t)c)
+ __builtin_abort ();
+ a[1] = 73;
+ b[1] = 83;
+ c[1] = 93;
+ }
+
+#pragma omp target exit data map(delete:a[0:2])
+
+ if (a[0] != 72 || a[1] != 73
+ || b[0] != 82 || b[1] != 83
+ || c[0] != 92 || c[1] != 93
+ || e[0] != 102)
+ __builtin_abort ();
+ free(a);
+ free(b);
+ free(c);
+ omp_target_free(e, 1);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+! { dg-do run }
+! { dg-require-effective-target omp_usm }
+
+! Ensure that USM doesn't break the use_device_ptr clause (host pointers and
+! target pointers being "unified").
+
+program usm
+ use iso_fortran_env
+ use omp_lib
+ implicit none
+
+ !$omp requires unified_shared_memory
+
+ integer, parameter :: N = 1024
+ real(real64), allocatable :: x(:), y(:)
+ integer :: i
+
+ allocate(x(N),y(N))
+
+ !$omp target data map(x)
+ ! The "i" variable is not explictly mapped yet, so ensures that both
+ ! mapping scan passes are tested.
+ !$omp target data map(i) use_device_ptr(x)
+ !$omp target teams distribute parallel do simd
+ do i=1,N
+ y(i) = x(i)
+ enddo
+ !$omp end target data
+ !$omp end target data
+
+ deallocate(x,y)
+
+end program usm
From: Hafiz Abid Qadeer <abidh@codesourcery.com> This patches changes calls to malloc/free/calloc/realloc and operator new to memory allocation functions in libgomp with allocator=ompx_unified_shared_mem_alloc. This helps existing code to benefit from the unified shared memory, and is necessary to implement "requires unified_shared_memory" using managed memory. The libgomp does the correct thing with all the mapping constructs and there are no memory copies if the pointer is pointing to unified shared memory. We only replace the standard new operator and not the class member or placement new. gcc/ChangeLog: * omp-low.cc (usm_transform): New function. (pass_data_usm_transform): New. (class pass_usm_transform): New. (make_pass_usm_transform): New function. * passes.def: Add pass_usm_transform pass. * tree-pass.h (make_pass_usm_transform): New prototype. libgomp/ChangeLog: * testsuite/libgomp.c-c++-common/requires-4.c: Add xfail. * testsuite/libgomp.c++/usm-1.C: New test. * testsuite/libgomp.c++/usm-2.C: New test. * testsuite/libgomp.c/usm-6.c: New test. * testsuite/libgomp.fortran/usm-2.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/usm-2.c: New test. * c-c++-common/gomp/usm-3.c: New test. * g++.dg/gomp/usm-1.C: New test. * g++.dg/gomp/usm-2.C: New test. * g++.dg/gomp/usm-3.C: New test. * g++.dg/gomp/usm-4.C: New test. * g++.dg/gomp/usm-5.C: New test. * gfortran.dg/gomp/usm-2.f90: New test. * gfortran.dg/gomp/usm-3.f90: New test. co-authored-by: Andrew Stubbs <ams@baylibre.com> --- gcc/omp-low.cc | 184 ++++++++++++++++++ gcc/passes.def | 1 + gcc/testsuite/c-c++-common/gomp/usm-2.c | 46 +++++ gcc/testsuite/c-c++-common/gomp/usm-3.c | 44 +++++ gcc/testsuite/g++.dg/gomp/usm-1.C | 32 +++ gcc/testsuite/g++.dg/gomp/usm-2.C | 30 +++ gcc/testsuite/g++.dg/gomp/usm-3.C | 38 ++++ gcc/testsuite/g++.dg/gomp/usm-4.C | 32 +++ gcc/testsuite/g++.dg/gomp/usm-5.C | 30 +++ gcc/testsuite/gfortran.dg/gomp/usm-2.f90 | 16 ++ gcc/testsuite/gfortran.dg/gomp/usm-3.f90 | 13 ++ gcc/tree-pass.h | 1 + libgomp/testsuite/libgomp.c++/usm-1.C | 54 +++++ libgomp/testsuite/libgomp.c++/usm-2.C | 33 ++++ .../libgomp.c-c++-common/requires-4.c | 2 + libgomp/testsuite/libgomp.c/usm-6.c | 94 +++++++++ libgomp/testsuite/libgomp.fortran/usm-2.f90 | 33 ++++ 17 files changed, 683 insertions(+) create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-2.c create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-3.c create mode 100644 gcc/testsuite/g++.dg/gomp/usm-1.C create mode 100644 gcc/testsuite/g++.dg/gomp/usm-2.C create mode 100644 gcc/testsuite/g++.dg/gomp/usm-3.C create mode 100644 gcc/testsuite/g++.dg/gomp/usm-4.C create mode 100644 gcc/testsuite/g++.dg/gomp/usm-5.C create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-2.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-3.f90 create mode 100644 libgomp/testsuite/libgomp.c++/usm-1.C create mode 100644 libgomp/testsuite/libgomp.c++/usm-2.C create mode 100644 libgomp/testsuite/libgomp.c/usm-6.c create mode 100644 libgomp/testsuite/libgomp.fortran/usm-2.f90