diff mbox

[hsa] Implement a number of atomic builtins

Message ID 20150903144955.GQ2685@virgil.suse.cz
State New
Headers show

Commit Message

Martin Jambor Sept. 3, 2015, 2:49 p.m. UTC
Hi,

The patch below implements expansion of a number of atomic builtin
calls into HSA atomic instructions.  Committed to the branch.

Thanks,

Martin


2015-09-03  Martin Jambor  <mjambor@suse.cz>

	* hsa-gen.c (gen_hsa_ternary_atomic_for_builtin): New function.
	(gen_hsa_insns_for_call): Use it to implement appropriate builtin
	calls.
---
 gcc/ChangeLog.hsa |   6 ++
 gcc/hsa-gen.c     | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 209 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index cbfc75a..9d569fe 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -3287,7 +3287,8 @@  gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call)
 /* Helper functions to create a single unary HSA operations out of calls to
    builtins.  OPCODE is the HSA operation to be generated.  STMT is a gimple
    call to a builtin.  HBB is the HSA BB to which the instruction should be
-   added and SSA_MAP is used to map gimple SSA names to HSA pseudoreisters.  */
+   added and SSA_MAP is used to map gimple SSA names to HSA
+   pseudoregisters.  */
 
 static void
 gen_hsa_unaryop_for_builtin (int opcode, gimple stmt, hsa_bb *hbb,
@@ -3304,6 +3305,97 @@  gen_hsa_unaryop_for_builtin (int opcode, gimple stmt, hsa_bb *hbb,
   gen_hsa_unary_operation (opcode, dest, op, hbb);
 }
 
+/* Helper function to create an HSA atomic binary operation instruction out of
+   calls to atomic builtins.  RET_ORIG is true if the built-in is the variant
+   that return s the value before applying operation, and false if it should
+   return the value after applying the operation (if it returns value at all).
+   ACODE is the atomic operation code, STMT is a gimple call to a builtin.  HBB
+   is the HSA BB to which the instruction should be added and SSA_MAP is used
+   to map gimple SSA names to HSA pseudoregisters.*/
+
+static void
+gen_hsa_ternary_atomic_for_builtin (bool ret_orig,
+ 				    enum BrigAtomicOperation acode, gimple stmt,
+				    hsa_bb *hbb, vec <hsa_op_reg_p> *ssa_map)
+{
+  tree lhs = gimple_call_lhs (stmt);
+
+  tree type = TREE_TYPE (gimple_call_arg (stmt, 1));
+  BrigType16_t hsa_type  = hsa_type_for_scalar_tree_type (type, false);
+  BrigType16_t bit_type  = hsa_bittype_for_type (hsa_type);
+
+  hsa_op_reg *dest;
+  int nops, opcode;
+  if (lhs)
+    {
+      if (ret_orig)
+	dest = hsa_reg_for_gimple_ssa (lhs, ssa_map);
+      else
+	dest = new hsa_op_reg (hsa_type);
+      opcode = BRIG_OPCODE_ATOMIC;
+      nops = 3;
+    }
+  else
+    {
+      dest = NULL;
+      opcode = BRIG_OPCODE_ATOMICNORET;
+      nops = 2;
+    }
+
+  hsa_insn_atomic *atominsn = new hsa_insn_atomic (nops, opcode, acode,
+						   bit_type);
+  hsa_op_address *addr;
+  addr = gen_hsa_addr (gimple_call_arg (stmt, 0), hbb, ssa_map);
+  hsa_op_base *op = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1),
+						    hbb, ssa_map, NULL);
+
+  if (lhs)
+    {
+      atominsn->set_op (0, dest);
+      atominsn->set_op (1, addr);
+      atominsn->set_op (2, op);
+    }
+  else
+    {
+      atominsn->set_op (0, addr);
+      atominsn->set_op (1, op);
+    }
+  /* FIXME: Perhaps select a more relaxed memory model based on the last
+     argument of the buildin call.  */
+
+  hbb->append_insn (atominsn);
+
+  /* HSA does not natively support the variants that return the modified value,
+     so re-do the operation again non-atomically if that is what was
+     requested.  */
+  if (lhs && !ret_orig)
+    {
+      int arith;
+      switch (acode)
+	{
+	case BRIG_ATOMIC_ADD:
+	  arith = BRIG_OPCODE_ADD;
+	  break;
+	case BRIG_ATOMIC_AND:
+	  arith = BRIG_OPCODE_AND;
+	  break;
+	case BRIG_ATOMIC_OR:
+	  arith = BRIG_OPCODE_OR;
+	  break;
+	case BRIG_ATOMIC_SUB:
+	  arith = BRIG_OPCODE_SUB;
+	  break;
+	case BRIG_ATOMIC_XOR:
+	  arith = BRIG_OPCODE_XOR;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      hsa_op_reg *real_dest = dest = hsa_reg_for_gimple_ssa (lhs, ssa_map);
+      gen_hsa_binary_operation (arith, real_dest, dest, op, hbb);
+    }
+}
+
 /* Generate HSA instructions for the given call statement STMT.  Instructions
    will be appended to HBB.  SSA_MAP maps gimple SSA names to HSA pseudo
    registers.  */
@@ -3440,7 +3532,6 @@  specialop:
     case BUILT_IN_ATOMIC_LOAD_8:
     case BUILT_IN_ATOMIC_LOAD_16:
       {
-	/* XXX Ignore mem model for now.  */
 	BrigType16_t mtype = mem_type_for_type (hsa_type_for_scalar_tree_type
 						(TREE_TYPE (lhs), false));
 	hsa_op_address *addr = gen_hsa_addr (gimple_call_arg (stmt, 0),
@@ -3452,10 +3543,120 @@  specialop:
 	atominsn->set_op (0, dest);
 	atominsn->set_op (1, addr);
 	atominsn->memoryorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
+
 	hbb->append_insn (atominsn);
 	break;
       }
 
+    case BUILT_IN_ATOMIC_EXCHANGE_1:
+    case BUILT_IN_ATOMIC_EXCHANGE_2:
+    case BUILT_IN_ATOMIC_EXCHANGE_4:
+    case BUILT_IN_ATOMIC_EXCHANGE_8:
+    case BUILT_IN_ATOMIC_EXCHANGE_16:
+      gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_EXCH, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_ADD_1:
+    case BUILT_IN_ATOMIC_FETCH_ADD_2:
+    case BUILT_IN_ATOMIC_FETCH_ADD_4:
+    case BUILT_IN_ATOMIC_FETCH_ADD_8:
+    case BUILT_IN_ATOMIC_FETCH_ADD_16:
+      gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_ADD, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_SUB_1:
+    case BUILT_IN_ATOMIC_FETCH_SUB_2:
+    case BUILT_IN_ATOMIC_FETCH_SUB_4:
+    case BUILT_IN_ATOMIC_FETCH_SUB_8:
+    case BUILT_IN_ATOMIC_FETCH_SUB_16:
+      gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_SUB, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_AND_1:
+    case BUILT_IN_ATOMIC_FETCH_AND_2:
+    case BUILT_IN_ATOMIC_FETCH_AND_4:
+    case BUILT_IN_ATOMIC_FETCH_AND_8:
+    case BUILT_IN_ATOMIC_FETCH_AND_16:
+      gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_AND, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_XOR_1:
+    case BUILT_IN_ATOMIC_FETCH_XOR_2:
+    case BUILT_IN_ATOMIC_FETCH_XOR_4:
+    case BUILT_IN_ATOMIC_FETCH_XOR_8:
+    case BUILT_IN_ATOMIC_FETCH_XOR_16:
+      gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_XOR, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_OR_1:
+    case BUILT_IN_ATOMIC_FETCH_OR_2:
+    case BUILT_IN_ATOMIC_FETCH_OR_4:
+    case BUILT_IN_ATOMIC_FETCH_OR_8:
+    case BUILT_IN_ATOMIC_FETCH_OR_16:
+      gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_OR, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_STORE_1:
+    case BUILT_IN_ATOMIC_STORE_2:
+    case BUILT_IN_ATOMIC_STORE_4:
+    case BUILT_IN_ATOMIC_STORE_8:
+    case BUILT_IN_ATOMIC_STORE_16:
+      /* Since there canot be any LHS, the first parameter is meaningless.  */
+      gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_ST, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_ADD_FETCH_1:
+    case BUILT_IN_ATOMIC_ADD_FETCH_2:
+    case BUILT_IN_ATOMIC_ADD_FETCH_4:
+    case BUILT_IN_ATOMIC_ADD_FETCH_8:
+    case BUILT_IN_ATOMIC_ADD_FETCH_16:
+      gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_ADD, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_SUB_FETCH_1:
+    case BUILT_IN_ATOMIC_SUB_FETCH_2:
+    case BUILT_IN_ATOMIC_SUB_FETCH_4:
+    case BUILT_IN_ATOMIC_SUB_FETCH_8:
+    case BUILT_IN_ATOMIC_SUB_FETCH_16:
+      gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_SUB, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_AND_FETCH_1:
+    case BUILT_IN_ATOMIC_AND_FETCH_2:
+    case BUILT_IN_ATOMIC_AND_FETCH_4:
+    case BUILT_IN_ATOMIC_AND_FETCH_8:
+    case BUILT_IN_ATOMIC_AND_FETCH_16:
+      gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_AND, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_XOR_FETCH_1:
+    case BUILT_IN_ATOMIC_XOR_FETCH_2:
+    case BUILT_IN_ATOMIC_XOR_FETCH_4:
+    case BUILT_IN_ATOMIC_XOR_FETCH_8:
+    case BUILT_IN_ATOMIC_XOR_FETCH_16:
+      gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_XOR, stmt, hbb,
+					  ssa_map);
+      break;
+
+    case BUILT_IN_ATOMIC_OR_FETCH_1:
+    case BUILT_IN_ATOMIC_OR_FETCH_2:
+    case BUILT_IN_ATOMIC_OR_FETCH_4:
+    case BUILT_IN_ATOMIC_OR_FETCH_8:
+    case BUILT_IN_ATOMIC_OR_FETCH_16:
+      gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_OR, stmt, hbb,
+					  ssa_map);
+      break;
+
     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_1:
     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_2:
     case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_4: