diff mbox

[hsa] Support for vector immediates

Message ID 20150617125317.GE18873@virgil.suse
State New
Headers show

Commit Message

Martin Jambor June 17, 2015, 12:53 p.m. UTC
Hi,

the patch below adds support for HSA vector immediates and
instructions storing them directly to memory, which was hitherto
missing on the branch.

Committed as r224554.

Thanks,

Martin


2015-06-16  Martin Jambor  <mjambor@suse.cz>

	* hsa-brig.c (hsa_get_imm_brig_type_len): New function.
	(emit_immediate_scalar_to_data_section): Likewise.
	(emit_immediate_operand): Reimplemented.
	* hsa-gen.c (gen_hsa_insns_for_load): Trimmed long line.
	(gen_hsa_insns_for_store): Added missing comment, trimmed long line,
	added another type exception for vector immediates.
diff mbox

Patch

diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index d28634d..bb4a2c1 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -785,118 +785,170 @@  enqueue_op (hsa_op_base *op)
   return ret;
 }
 
-/* Emit an immediate BRIG operand IMM.  */
+/* Return the length of the birg type TYPE that is going to be streamed out as
+   an immediate constant (so it must not be B1).  */
 
-static void
-emit_immediate_operand (hsa_op_immed *imm)
+static unsigned
+hsa_get_imm_brig_type_len (BrigType16_t type)
 {
-  struct BrigOperandConstantBytes out;
-  uint32_t byteCount;
+  BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
+  BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
 
-  union
-  {
-    uint8_t b8;
-    uint16_t b16;
-    uint32_t b32;
-    uint64_t b64;
-  } bytes;
-  unsigned len;
+  switch (pack_type)
+    {
+    case BRIG_TYPE_PACK_NONE:
+      break;
+    case BRIG_TYPE_PACK_32:
+      return 4;
+    case BRIG_TYPE_PACK_64:
+      return 8;
+    case BRIG_TYPE_PACK_128:
+      return 16;
+    default:
+      gcc_unreachable ();
+    }
 
-  memset (&out, 0, sizeof (out));
-  switch (imm->type)
+  switch (base_type)
     {
     case BRIG_TYPE_U8:
     case BRIG_TYPE_S8:
-      len = 1;
-      bytes.b8 = (uint8_t) TREE_INT_CST_LOW (imm->value);
-      break;
+    case BRIG_TYPE_B8:
+      return 1;
     case BRIG_TYPE_U16:
     case BRIG_TYPE_S16:
-      bytes.b16 = (uint16_t) TREE_INT_CST_LOW (imm->value);
-      len = 2;
-      break;
-
     case BRIG_TYPE_F16:
-      sorry ("Support for HSA does not implement immediate 16 bit FPU "
-	     "operands");
-      len = 2;
-      break;
-
+    case BRIG_TYPE_B16:
+      return 2;
     case BRIG_TYPE_U32:
     case BRIG_TYPE_S32:
-      bytes.b32 = (uint32_t) TREE_INT_CST_LOW (imm->value);
-      len = 4;
-      break;
-
+    case BRIG_TYPE_F32:
+    case BRIG_TYPE_B32:
+      return 4;
     case BRIG_TYPE_U64:
     case BRIG_TYPE_S64:
-      bytes.b64 = (uint64_t) int_cst_value (imm->value);
-      len = 8;
-      break;
-
-    case BRIG_TYPE_F32:
     case BRIG_TYPE_F64:
-      {
-	tree expr = imm->value;
-	tree type = TREE_TYPE (expr);
+    case BRIG_TYPE_B64:
+      return 8;
+    case BRIG_TYPE_B128:
+      return 16;
+    default:
+      gcc_unreachable ();
+    }
+}
 
-	len = GET_MODE_SIZE (TYPE_MODE (type));
+/* Emit one scalar VALUE to the data BRIG section.  If NEED_LEN is not equal to
+   zero, shrink or extend the value to NEED_LEN bytes.  Return how many bytes
+   were written.  */
 
-	/* There are always 32 bits in each long, no matter the size of
-	   the hosts long.  */
-	long tmp[6];
+static int
+emit_immediate_scalar_to_data_section (tree value, unsigned need_len)
+{
+  union
+  {
+    uint8_t b8;
+    uint16_t b16;
+    uint32_t b32;
+    uint64_t b64;
+  } bytes;
 
-	gcc_assert (len == 4 || len == 8);
+  memset (&bytes, 0, sizeof (bytes));
+  tree type = TREE_TYPE (value);
+  gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
+  unsigned data_len = tree_to_uhwi (TYPE_SIZE (type))/BITS_PER_UNIT;
+  if (INTEGRAL_TYPE_P (type))
+    switch (data_len)
+      {
+      case 1:
+	bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
+	break;
+      case 2:
+	bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
+	break;
+      case 4:
+	bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
+	break;
+      case 8:
+	bytes.b64 = (uint64_t) int_cst_value (value);
+	break;
+      default:
+	gcc_unreachable ();
+      }
+  else if (SCALAR_FLOAT_TYPE_P (type))
+    {
+      if (data_len == 2)
+	{
+	  sorry ("Support for HSA does not implement immediate 16 bit FPU "
+		 "operands");
+	  return 2;
+	}
+      unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
+      /* There are always 32 bits in each long, no matter the size of
+	 the hosts long.  */
+      long tmp[6];
 
-	real_to_target (tmp, TREE_REAL_CST_PTR (expr), TYPE_MODE (type));
+      real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
 
-	if (len == 4)
-	  bytes.b32 = (uint32_t) tmp[0];
-	else
-	  {
-	    bytes.b64 = (uint64_t)(uint32_t) tmp[1];
-	    bytes.b64 <<= 32;
-	    bytes.b64 |= (uint32_t) tmp[0];
-	  }
+      if (int_len == 4)
+	bytes.b32 = (uint32_t) tmp[0];
+      else
+	{
+	  bytes.b64 = (uint64_t)(uint32_t) tmp[1];
+	  bytes.b64 <<= 32;
+	  bytes.b64 |= (uint32_t) tmp[0];
+	}
+    }
+  else
+    gcc_unreachable ();
 
-	break;
-      }
+  int len;
+  if (need_len == 0)
+    len = data_len;
+  else
+    len = need_len;
 
-    case BRIG_TYPE_U8X4:
-    case BRIG_TYPE_S8X4:
-    case BRIG_TYPE_U16X2:
-    case BRIG_TYPE_S16X2:
-    case BRIG_TYPE_F16X2:
-      len = 4;
-      sorry ("Support for HSA does not implement immediate 32bit "
-	     "vector operands. ");
-      break;
+  brig_data.add (&bytes, len);
+  return len;
+}
 
-    case BRIG_TYPE_U8X8:
-    case BRIG_TYPE_S8X8:
-    case BRIG_TYPE_U16X4:
-    case BRIG_TYPE_S16X4:
-    case BRIG_TYPE_F16X4:
-    case BRIG_TYPE_U32X2:
-    case BRIG_TYPE_S32X2:
-    case BRIG_TYPE_F32X2:
-      len = 8;
-      sorry ("Support for HSA does not implement immediate 32bit "
-	     "vector operands. ");
-      break;
+/* Emit an immediate BRIG operand IMM.  The BRIG type of the immedaite might
+   have been massaged to comply with various HSA/BRIG type requirements, so the
+   ony important aspect of that is the length (because HSAIL might expect
+   smaller constants or become bit-data).  The data should be represented
+   according to what is in the tree representation.  */
 
-    default:
-      gcc_unreachable ();
-    }
+static void
+emit_immediate_operand (hsa_op_immed *imm)
+{
+  struct BrigOperandConstantBytes out;
+  unsigned total_len = hsa_get_imm_brig_type_len (imm->type);
+
+  /* We do not produce HSAIL array types anywhere.  */
+  gcc_assert (!(imm->type & BRIG_TYPE_ARRAY));
 
+  memset (&out, 0, sizeof (out));
   out.base.byteCount = htole16 (sizeof (out));
   out.base.kind = htole16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
-  byteCount = htole32 (len);
+  uint32_t byteCount = htole32 (total_len);
   out.type = htole16 (imm->type);
-  out.bytes = brig_data.add (&byteCount, sizeof (byteCount));
-  brig_data.add (&bytes, len);
-
+  out.bytes = htole32 (brig_data.add (&byteCount, sizeof (byteCount)));
   brig_operand.add (&out, sizeof(out));
+
+  if (TREE_CODE (imm->value) == VECTOR_CST)
+    {
+      int i, num = VECTOR_CST_NELTS (imm->value);
+      for (i = 0; i < num; i++)
+	{
+	  unsigned actual;
+	  actual = emit_immediate_scalar_to_data_section
+	    (VECTOR_CST_ELT (imm->value, i), 0);
+	  total_len -= actual;
+	}
+      /* Vectors should have the exact size.  */
+      gcc_assert (total_len == 0);
+    }
+  else
+    emit_immediate_scalar_to_data_section (imm->value, total_len);
+
   brig_data.round_size_up (4);
 }
 
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index db5200d..0349efd 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -1318,7 +1318,8 @@  gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb,
       addr = gen_hsa_addr (rhs, hbb, ssa_map);
       mem->opcode = BRIG_OPCODE_LD;
       /* Not dest->type, that's possibly extended.  */
-      mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type, false));
+      mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type,
+								    false));
       mem->operands[0] = dest;
       mem->operands[1] = addr;
       set_reg_def (dest, mem);
@@ -1331,6 +1332,9 @@  gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb,
 	   rhs);
 }
 
+/* Generate HSAIL instructions storing into memory.  LHS is the destination of
+   the store, SRC is the source operand.  Add instructions to HBB, use SSA_MAP
+   for HSA SSA lookup.  */
 
 static void
 gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb,
@@ -1343,7 +1347,8 @@  gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb,
   mem->opcode = BRIG_OPCODE_ST;
   if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (src))
     reg->uses.safe_push (mem);
-  mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), false));
+  mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
+								false));
 
   /* XXX The HSAIL disasm has another constraint: if the source
      is an immediate then it must match the destination type.  If
@@ -1351,7 +1356,32 @@  gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb,
      We're always allocating new operands so we can modify the above
      in place.  */
   if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src))
-    imm->type = mem->type;
+    {
+      if ((imm->type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_NONE)
+	imm->type = mem->type;
+      else
+	{
+	  /* ...and all vector immediates apparently need to be vectors of
+	     unsigned bytes. */
+	  BrigType16_t bt = bittype_for_type (imm->type);
+	  gcc_assert (bt == bittype_for_type (mem->type));
+	  switch (bt)
+	    {
+	    case BRIG_TYPE_B32:
+	      imm->type = BRIG_TYPE_U8X4;
+	      break;
+	    case BRIG_TYPE_B64:
+	      imm->type = BRIG_TYPE_U8X8;
+	      break;
+	    case BRIG_TYPE_B128:
+	      imm->type = BRIG_TYPE_U8X16;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+
   mem->operands[0] = src;
   mem->operands[1] = addr;
   if (addr->reg)