@@ -785,118 +785,170 @@ enqueue_op (hsa_op_base *op)
return ret;
}
-/* Emit an immediate BRIG operand IMM. */
+/* Return the length of the birg type TYPE that is going to be streamed out as
+ an immediate constant (so it must not be B1). */
-static void
-emit_immediate_operand (hsa_op_immed *imm)
+static unsigned
+hsa_get_imm_brig_type_len (BrigType16_t type)
{
- struct BrigOperandConstantBytes out;
- uint32_t byteCount;
+ BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
+ BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
- union
- {
- uint8_t b8;
- uint16_t b16;
- uint32_t b32;
- uint64_t b64;
- } bytes;
- unsigned len;
+ switch (pack_type)
+ {
+ case BRIG_TYPE_PACK_NONE:
+ break;
+ case BRIG_TYPE_PACK_32:
+ return 4;
+ case BRIG_TYPE_PACK_64:
+ return 8;
+ case BRIG_TYPE_PACK_128:
+ return 16;
+ default:
+ gcc_unreachable ();
+ }
- memset (&out, 0, sizeof (out));
- switch (imm->type)
+ switch (base_type)
{
case BRIG_TYPE_U8:
case BRIG_TYPE_S8:
- len = 1;
- bytes.b8 = (uint8_t) TREE_INT_CST_LOW (imm->value);
- break;
+ case BRIG_TYPE_B8:
+ return 1;
case BRIG_TYPE_U16:
case BRIG_TYPE_S16:
- bytes.b16 = (uint16_t) TREE_INT_CST_LOW (imm->value);
- len = 2;
- break;
-
case BRIG_TYPE_F16:
- sorry ("Support for HSA does not implement immediate 16 bit FPU "
- "operands");
- len = 2;
- break;
-
+ case BRIG_TYPE_B16:
+ return 2;
case BRIG_TYPE_U32:
case BRIG_TYPE_S32:
- bytes.b32 = (uint32_t) TREE_INT_CST_LOW (imm->value);
- len = 4;
- break;
-
+ case BRIG_TYPE_F32:
+ case BRIG_TYPE_B32:
+ return 4;
case BRIG_TYPE_U64:
case BRIG_TYPE_S64:
- bytes.b64 = (uint64_t) int_cst_value (imm->value);
- len = 8;
- break;
-
- case BRIG_TYPE_F32:
case BRIG_TYPE_F64:
- {
- tree expr = imm->value;
- tree type = TREE_TYPE (expr);
+ case BRIG_TYPE_B64:
+ return 8;
+ case BRIG_TYPE_B128:
+ return 16;
+ default:
+ gcc_unreachable ();
+ }
+}
- len = GET_MODE_SIZE (TYPE_MODE (type));
+/* Emit one scalar VALUE to the data BRIG section. If NEED_LEN is not equal to
+ zero, shrink or extend the value to NEED_LEN bytes. Return how many bytes
+ were written. */
- /* There are always 32 bits in each long, no matter the size of
- the hosts long. */
- long tmp[6];
+static int
+emit_immediate_scalar_to_data_section (tree value, unsigned need_len)
+{
+ union
+ {
+ uint8_t b8;
+ uint16_t b16;
+ uint32_t b32;
+ uint64_t b64;
+ } bytes;
- gcc_assert (len == 4 || len == 8);
+ memset (&bytes, 0, sizeof (bytes));
+ tree type = TREE_TYPE (value);
+ gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
+ unsigned data_len = tree_to_uhwi (TYPE_SIZE (type))/BITS_PER_UNIT;
+ if (INTEGRAL_TYPE_P (type))
+ switch (data_len)
+ {
+ case 1:
+ bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
+ break;
+ case 2:
+ bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
+ break;
+ case 4:
+ bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
+ break;
+ case 8:
+ bytes.b64 = (uint64_t) int_cst_value (value);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ else if (SCALAR_FLOAT_TYPE_P (type))
+ {
+ if (data_len == 2)
+ {
+ sorry ("Support for HSA does not implement immediate 16 bit FPU "
+ "operands");
+ return 2;
+ }
+ unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
+ /* There are always 32 bits in each long, no matter the size of
+ the hosts long. */
+ long tmp[6];
- real_to_target (tmp, TREE_REAL_CST_PTR (expr), TYPE_MODE (type));
+ real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
- if (len == 4)
- bytes.b32 = (uint32_t) tmp[0];
- else
- {
- bytes.b64 = (uint64_t)(uint32_t) tmp[1];
- bytes.b64 <<= 32;
- bytes.b64 |= (uint32_t) tmp[0];
- }
+ if (int_len == 4)
+ bytes.b32 = (uint32_t) tmp[0];
+ else
+ {
+ bytes.b64 = (uint64_t)(uint32_t) tmp[1];
+ bytes.b64 <<= 32;
+ bytes.b64 |= (uint32_t) tmp[0];
+ }
+ }
+ else
+ gcc_unreachable ();
- break;
- }
+ int len;
+ if (need_len == 0)
+ len = data_len;
+ else
+ len = need_len;
- case BRIG_TYPE_U8X4:
- case BRIG_TYPE_S8X4:
- case BRIG_TYPE_U16X2:
- case BRIG_TYPE_S16X2:
- case BRIG_TYPE_F16X2:
- len = 4;
- sorry ("Support for HSA does not implement immediate 32bit "
- "vector operands. ");
- break;
+ brig_data.add (&bytes, len);
+ return len;
+}
- case BRIG_TYPE_U8X8:
- case BRIG_TYPE_S8X8:
- case BRIG_TYPE_U16X4:
- case BRIG_TYPE_S16X4:
- case BRIG_TYPE_F16X4:
- case BRIG_TYPE_U32X2:
- case BRIG_TYPE_S32X2:
- case BRIG_TYPE_F32X2:
- len = 8;
- sorry ("Support for HSA does not implement immediate 32bit "
- "vector operands. ");
- break;
+/* Emit an immediate BRIG operand IMM. The BRIG type of the immedaite might
+ have been massaged to comply with various HSA/BRIG type requirements, so the
+ ony important aspect of that is the length (because HSAIL might expect
+ smaller constants or become bit-data). The data should be represented
+ according to what is in the tree representation. */
- default:
- gcc_unreachable ();
- }
+static void
+emit_immediate_operand (hsa_op_immed *imm)
+{
+ struct BrigOperandConstantBytes out;
+ unsigned total_len = hsa_get_imm_brig_type_len (imm->type);
+
+ /* We do not produce HSAIL array types anywhere. */
+ gcc_assert (!(imm->type & BRIG_TYPE_ARRAY));
+ memset (&out, 0, sizeof (out));
out.base.byteCount = htole16 (sizeof (out));
out.base.kind = htole16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
- byteCount = htole32 (len);
+ uint32_t byteCount = htole32 (total_len);
out.type = htole16 (imm->type);
- out.bytes = brig_data.add (&byteCount, sizeof (byteCount));
- brig_data.add (&bytes, len);
-
+ out.bytes = htole32 (brig_data.add (&byteCount, sizeof (byteCount)));
brig_operand.add (&out, sizeof(out));
+
+ if (TREE_CODE (imm->value) == VECTOR_CST)
+ {
+ int i, num = VECTOR_CST_NELTS (imm->value);
+ for (i = 0; i < num; i++)
+ {
+ unsigned actual;
+ actual = emit_immediate_scalar_to_data_section
+ (VECTOR_CST_ELT (imm->value, i), 0);
+ total_len -= actual;
+ }
+ /* Vectors should have the exact size. */
+ gcc_assert (total_len == 0);
+ }
+ else
+ emit_immediate_scalar_to_data_section (imm->value, total_len);
+
brig_data.round_size_up (4);
}
@@ -1318,7 +1318,8 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb,
addr = gen_hsa_addr (rhs, hbb, ssa_map);
mem->opcode = BRIG_OPCODE_LD;
/* Not dest->type, that's possibly extended. */
- mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type, false));
+ mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type,
+ false));
mem->operands[0] = dest;
mem->operands[1] = addr;
set_reg_def (dest, mem);
@@ -1331,6 +1332,9 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb,
rhs);
}
+/* Generate HSAIL instructions storing into memory. LHS is the destination of
+ the store, SRC is the source operand. Add instructions to HBB, use SSA_MAP
+ for HSA SSA lookup. */
static void
gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb,
@@ -1343,7 +1347,8 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb,
mem->opcode = BRIG_OPCODE_ST;
if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (src))
reg->uses.safe_push (mem);
- mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), false));
+ mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
+ false));
/* XXX The HSAIL disasm has another constraint: if the source
is an immediate then it must match the destination type. If
@@ -1351,7 +1356,32 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb,
We're always allocating new operands so we can modify the above
in place. */
if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src))
- imm->type = mem->type;
+ {
+ if ((imm->type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_NONE)
+ imm->type = mem->type;
+ else
+ {
+ /* ...and all vector immediates apparently need to be vectors of
+ unsigned bytes. */
+ BrigType16_t bt = bittype_for_type (imm->type);
+ gcc_assert (bt == bittype_for_type (mem->type));
+ switch (bt)
+ {
+ case BRIG_TYPE_B32:
+ imm->type = BRIG_TYPE_U8X4;
+ break;
+ case BRIG_TYPE_B64:
+ imm->type = BRIG_TYPE_U8X8;
+ break;
+ case BRIG_TYPE_B128:
+ imm->type = BRIG_TYPE_U8X16;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
+
mem->operands[0] = src;
mem->operands[1] = addr;
if (addr->reg)