===================================================================
@@ -1,3 +1,16 @@
+2015-05-19 Bernd Schmidt <bernds@codesourcery.com>
+
+ * omp-builtins.def (GOACC_thread_broadcast,
+ GOACC_thread_broadcast_ll): New builtins.
+ * optabs.def (oacc_thread_broadcast_optab): New optab.
+ * builtins.c (expand_builtin_oacc_thread_broadcast): New function.
+ (expand_builtin): Use it.
+ * config/nvptx/nvptx.c (nvptx_cannot_copy_insn_p): New function.
+ (TARGET_CANNOT_COPY_INSN_P): Define.
+ * config/nvptx/nvptx.md (UNSPECV_WARP_BCAST): New constant.
+ (oacc_thread_broadcastsi): New pattern.
+ (oacc_thread_broadcastdi): New expander.
+
2015-05-19 Tom de Vries <tom@codesourcery.com>
* omp-low.c (enclosing_target_ctx): Comment out.
===================================================================
@@ -6022,6 +6022,43 @@ expand_oacc_ganglocal_ptr (rtx target AT
return NULL_RTX;
}
+/* Handle a GOACC_thread_broadcast builtin call EXP with target TARGET.
+ Return the result. */
+
+static rtx
+expand_builtin_oacc_thread_broadcast (tree exp, rtx target)
+{
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ enum insn_code icode;
+
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (arg0));
+ gcc_assert (INTEGRAL_MODE_P (mode));
+ do
+ {
+ icode = direct_optab_handler (oacc_thread_broadcast_optab, mode);
+ mode = GET_MODE_WIDER_MODE (mode);
+ }
+ while (icode == CODE_FOR_nothing && mode != VOIDmode);
+ if (icode == CODE_FOR_nothing)
+ return expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+ rtx tmp = target;
+ machine_mode mode0 = insn_data[icode].operand[0].mode;
+ machine_mode mode1 = insn_data[icode].operand[1].mode;
+ if (!REG_P (tmp) || GET_MODE (tmp) != mode0)
+ tmp = gen_reg_rtx (mode0);
+ rtx op1 = expand_expr (arg0, NULL_RTX, mode1, EXPAND_NORMAL);
+ if (GET_MODE (op1) != mode1)
+ op1 = convert_to_mode (mode1, op1, 0);
+
+ rtx insn = GEN_FCN (icode) (tmp, op1);
+ if (insn != NULL_RTX)
+ {
+ emit_insn (insn);
+ return tmp;
+ }
+ return const0_rtx;
+}
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
@@ -7177,6 +7214,10 @@ expand_builtin (tree exp, rtx target, rt
return target;
break;
+ case BUILT_IN_GOACC_THREAD_BROADCAST:
+ case BUILT_IN_GOACC_THREAD_BROADCAST_LL:
+ return expand_builtin_oacc_thread_broadcast (exp, target);
+
default: /* just do library call, if unknown builtin */
break;
}
===================================================================
@@ -2029,6 +2029,15 @@ nvptx_vector_alignment (const_tree type)
return MIN (align, BIGGEST_ALIGNMENT);
}
+
+static bool
+nvptx_cannot_copy_insn_p (rtx_insn *insn)
+{
+ if (recog_memoized (insn) == CODE_FOR_oacc_thread_broadcastsi)
+ return true;
+ return false;
+}
+
/* Record a symbol for mkoffload to enter into the mapping table. */
@@ -2153,6 +2162,9 @@ nvptx_file_end (void)
#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-nvptx.h"
===================================================================
@@ -61,6 +61,7 @@ (define_c_enum "unspecv" [
UNSPECV_LOCK
UNSPECV_CAS
UNSPECV_XCHG
+ UNSPECV_WARP_BCAST
])
(define_attr "subregs_ok" "false,true"
@@ -1322,6 +1323,37 @@ (define_expand "oacc_ctaid"
FAIL;
})
+(define_insn "oacc_thread_broadcastsi"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "")
+ (unspec_volatile:SI [(match_operand:SI 1 "nvptx_register_operand" "")]
+ UNSPECV_WARP_BCAST))]
+ ""
+ "%.\\tshfl.idx.b32\\t%0, %1, 0, 31;")
+
+(define_expand "oacc_thread_broadcastdi"
+ [(set (match_operand:DI 0 "nvptx_register_operand" "")
+ (unspec_volatile:DI [(match_operand:DI 1 "nvptx_register_operand" "")]
+ UNSPECV_WARP_BCAST))]
+ ""
+{
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_lshrdi3 (t, operands[1], GEN_INT (32)));
+ rtx op0 = force_reg (SImode, gen_lowpart (SImode, t));
+ rtx op1 = force_reg (SImode, gen_lowpart (SImode, operands[1]));
+ rtx targ0 = gen_reg_rtx (SImode);
+ rtx targ1 = gen_reg_rtx (SImode);
+ emit_insn (gen_oacc_thread_broadcastsi (targ0, op0));
+ emit_insn (gen_oacc_thread_broadcastsi (targ1, op1));
+ rtx t2 = gen_reg_rtx (DImode);
+ rtx t3 = gen_reg_rtx (DImode);
+ emit_insn (gen_extendsidi2 (t2, targ0));
+ emit_insn (gen_extendsidi2 (t3, targ1));
+ rtx t4 = gen_reg_rtx (DImode);
+ emit_insn (gen_ashldi3 (t4, t2, GEN_INT (32)));
+ emit_insn (gen_iordi3 (operands[0], t3, t4));
+ DONE;
+})
+
(define_insn "ganglocal_ptr<mode>"
[(set (match_operand:P 0 "nvptx_register_operand" "")
(unspec:P [(const_int 0)] UNSPEC_SHARED_DATA))]
===================================================================
@@ -1,3 +1,7 @@
+2015-05-19 Bernd Schmidt <bernds@codesourcery.com>
+
+ * types.def (BT_FN_ULONGLONG_ULONGLONG): Define.
+
2015-05-13 Cesar Philippidis <cesar@codesourcery.com>
* f95-lang.c (gfc_attribute_table): Add and "oacc function"
===================================================================
@@ -84,6 +84,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRPTR,
DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR)
DEF_FUNCTION_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT)
+DEF_FUNCTION_TYPE_1 (BT_FN_ULONGLONG_ULONGLONG, BT_ULONGLONG, BT_ULONGLONG)
DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
===================================================================
@@ -77,6 +77,10 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GA
BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr",
BT_FN_PTR_PTR, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD_BROADCAST, "GOACC_thread_broadcast",
+ BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD_BROADCAST_LL, "GOACC_thread_broadcast_ll",
+ BT_FN_ULONGLONG_ULONGLONG, ATTR_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
===================================================================
@@ -332,3 +332,5 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I
OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
+
+OPTAB_D (oacc_thread_broadcast_optab, "oacc_thread_broadcast$I$a")