@@ -23,962 +23,1236 @@
#include "config.h"
#include "system.h"
-typedef uint32_t BrigDataOffset32_t;
+typedef uint32_t BrigVersion32_t;
+
+enum BrigVersion {
+
+ BRIG_VERSION_HSAIL_MAJOR = 1,
+ BRIG_VERSION_HSAIL_MINOR = 0,
+ BRIG_VERSION_BRIG_MAJOR = 1,
+ BRIG_VERSION_BRIG_MINOR = 0
+};
+
+typedef uint8_t BrigAlignment8_t;
+
+typedef uint8_t BrigAllocation8_t;
+
+typedef uint8_t BrigAluModifier8_t;
+
+typedef uint8_t BrigAtomicOperation8_t;
+
typedef uint32_t BrigCodeOffset32_t;
-typedef uint32_t BrigOperandOffset32_t;
-typedef BrigDataOffset32_t BrigDataOffsetString32_t;
+typedef uint8_t BrigCompareOperation8_t;
+
+typedef uint16_t BrigControlDirective16_t;
+
+typedef uint32_t BrigDataOffset32_t;
+
typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
+
typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
-typedef uint8_t BrigAlignment8_t;
+typedef BrigDataOffset32_t BrigDataOffsetString32_t;
+
+typedef uint8_t BrigExecutableModifier8_t;
+
+typedef uint8_t BrigImageChannelOrder8_t;
+
+typedef uint8_t BrigImageChannelType8_t;
+
+typedef uint8_t BrigImageGeometry8_t;
+
+typedef uint8_t BrigImageQuery8_t;
+
+typedef uint16_t BrigKind16_t;
+
+typedef uint8_t BrigLinkage8_t;
+
+typedef uint8_t BrigMachineModel8_t;
+
+typedef uint8_t BrigMemoryModifier8_t;
+
+typedef uint8_t BrigMemoryOrder8_t;
+
+typedef uint8_t BrigMemoryScope8_t;
+
+typedef uint16_t BrigOpcode16_t;
+
+typedef uint32_t BrigOperandOffset32_t;
+
+typedef uint8_t BrigPack8_t;
+
+typedef uint8_t BrigProfile8_t;
+
+typedef uint16_t BrigRegisterKind16_t;
+
+typedef uint8_t BrigRound8_t;
+
+typedef uint8_t BrigSamplerAddressing8_t;
+
+typedef uint8_t BrigSamplerCoordNormalization8_t;
+
+typedef uint8_t BrigSamplerFilter8_t;
+
+typedef uint8_t BrigSamplerQuery8_t;
+
+typedef uint32_t BrigSectionIndex32_t;
+
+typedef uint8_t BrigSegCvtModifier8_t;
+
+typedef uint8_t BrigSegment8_t;
+
+typedef uint32_t BrigStringOffset32_t;
+
+typedef uint16_t BrigType16_t;
+
+typedef uint8_t BrigVariableModifier8_t;
+
+typedef uint8_t BrigWidth8_t;
+
+typedef uint32_t BrigExceptions32_t;
+
+enum BrigKind {
+
+ BRIG_KIND_NONE = 0x0000,
+
+ BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
+ BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
+ BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
+ BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
+ BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
+ BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
+ BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
+ BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
+ BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
+ BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
+ BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
+ BRIG_KIND_DIRECTIVE_LOC = 0x100a,
+ BRIG_KIND_DIRECTIVE_MODULE = 0x100b,
+ BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
+ BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,
+ BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
+ BRIG_KIND_DIRECTIVE_END = 0x100f,
+
+ BRIG_KIND_INST_BEGIN = 0x2000,
+ BRIG_KIND_INST_ADDR = 0x2000,
+ BRIG_KIND_INST_ATOMIC = 0x2001,
+ BRIG_KIND_INST_BASIC = 0x2002,
+ BRIG_KIND_INST_BR = 0x2003,
+ BRIG_KIND_INST_CMP = 0x2004,
+ BRIG_KIND_INST_CVT = 0x2005,
+ BRIG_KIND_INST_IMAGE = 0x2006,
+ BRIG_KIND_INST_LANE = 0x2007,
+ BRIG_KIND_INST_MEM = 0x2008,
+ BRIG_KIND_INST_MEM_FENCE = 0x2009,
+ BRIG_KIND_INST_MOD = 0x200a,
+ BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
+ BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
+ BRIG_KIND_INST_QUEUE = 0x200d,
+ BRIG_KIND_INST_SEG = 0x200e,
+ BRIG_KIND_INST_SEG_CVT = 0x200f,
+ BRIG_KIND_INST_SIGNAL = 0x2010,
+ BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
+ BRIG_KIND_INST_END = 0x2012,
+
+ BRIG_KIND_OPERAND_BEGIN = 0x3000,
+ BRIG_KIND_OPERAND_ADDRESS = 0x3000,
+ BRIG_KIND_OPERAND_ALIGN = 0x3001,
+ BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
+ BRIG_KIND_OPERAND_CODE_REF = 0x3003,
+ BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
+ BRIG_KIND_OPERAND_RESERVED = 0x3005,
+ BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
+ BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
+ BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
+ BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
+ BRIG_KIND_OPERAND_REGISTER = 0x300a,
+ BRIG_KIND_OPERAND_STRING = 0x300b,
+ BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
+ BRIG_KIND_OPERAND_END = 0x300d
+};
+
enum BrigAlignment {
- BRIG_ALIGNMENT_NONE = 0,
- BRIG_ALIGNMENT_1 = 1,
- BRIG_ALIGNMENT_2 = 2,
- BRIG_ALIGNMENT_4 = 3,
- BRIG_ALIGNMENT_8 = 4,
- BRIG_ALIGNMENT_16 = 5,
- BRIG_ALIGNMENT_32 = 6,
- BRIG_ALIGNMENT_64 = 7,
- BRIG_ALIGNMENT_128 = 8,
- BRIG_ALIGNMENT_256 = 9
+
+ BRIG_ALIGNMENT_NONE = 0,
+ BRIG_ALIGNMENT_1 = 1,
+ BRIG_ALIGNMENT_2 = 2,
+ BRIG_ALIGNMENT_4 = 3,
+ BRIG_ALIGNMENT_8 = 4,
+ BRIG_ALIGNMENT_16 = 5,
+ BRIG_ALIGNMENT_32 = 6,
+ BRIG_ALIGNMENT_64 = 7,
+ BRIG_ALIGNMENT_128 = 8,
+ BRIG_ALIGNMENT_256 = 9,
+
+ BRIG_ALIGNMENT_LAST,
+ BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1
};
-typedef uint8_t BrigAllocation8_t;
enum BrigAllocation {
- BRIG_ALLOCATION_NONE = 0,
- BRIG_ALLOCATION_PROGRAM = 1,
- BRIG_ALLOCATION_AGENT = 2,
- BRIG_ALLOCATION_AUTOMATIC = 3
+
+ BRIG_ALLOCATION_NONE = 0,
+ BRIG_ALLOCATION_PROGRAM = 1,
+ BRIG_ALLOCATION_AGENT = 2,
+ BRIG_ALLOCATION_AUTOMATIC = 3
};
-typedef uint16_t BrigAluModifier16_t;
enum BrigAluModifierMask {
- BRIG_ALU_ROUND = 15,
- BRIG_ALU_FTZ = 16
+ BRIG_ALU_FTZ = 1
};
-typedef uint8_t BrigAtomicOperation8_t;
enum BrigAtomicOperation {
- BRIG_ATOMIC_ADD = 0,
- BRIG_ATOMIC_AND = 1,
- BRIG_ATOMIC_CAS = 2,
- BRIG_ATOMIC_EXCH = 3,
- BRIG_ATOMIC_LD = 4,
- BRIG_ATOMIC_MAX = 5,
- BRIG_ATOMIC_MIN = 6,
- BRIG_ATOMIC_OR = 7,
- BRIG_ATOMIC_ST = 8,
- BRIG_ATOMIC_SUB = 9,
- BRIG_ATOMIC_WRAPDEC = 10,
- BRIG_ATOMIC_WRAPINC = 11,
- BRIG_ATOMIC_XOR = 12,
- BRIG_ATOMIC_WAIT_EQ = 13,
- BRIG_ATOMIC_WAIT_NE = 14,
- BRIG_ATOMIC_WAIT_LT = 15,
- BRIG_ATOMIC_WAIT_GTE = 16,
- BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
- BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
- BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
- BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
-};
-
-typedef uint16_t BrigKinds16_t;
-enum BrigKind {
- BRIG_KIND_NONE = 0x0000,
- BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
- BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
- BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
- BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
- BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
- BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
- BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
- BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
- BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
- BRIG_KIND_DIRECTIVE_LOC = 0x100a,
- BRIG_KIND_DIRECTIVE_PRAGMA = 0x100b,
- BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100c,
- BRIG_KIND_DIRECTIVE_VARIABLE = 0x100d,
- BRIG_KIND_DIRECTIVE_VERSION = 0x100e,
- BRIG_KIND_DIRECTIVE_END = 0x100f,
- BRIG_KIND_INST_BEGIN = 0x2000,
- BRIG_KIND_INST_ADDR = 0x2000,
- BRIG_KIND_INST_ATOMIC = 0x2001,
- BRIG_KIND_INST_BASIC = 0x2002,
- BRIG_KIND_INST_BR = 0x2003,
- BRIG_KIND_INST_CMP = 0x2004,
- BRIG_KIND_INST_CVT = 0x2005,
- BRIG_KIND_INST_IMAGE = 0x2006,
- BRIG_KIND_INST_LANE = 0x2007,
- BRIG_KIND_INST_MEM = 0x2008,
- BRIG_KIND_INST_MEM_FENCE = 0x2009,
- BRIG_KIND_INST_MOD = 0x200a,
- BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
- BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
- BRIG_KIND_INST_QUEUE = 0x200d,
- BRIG_KIND_INST_SEG = 0x200e,
- BRIG_KIND_INST_SEG_CVT = 0x200f,
- BRIG_KIND_INST_SIGNAL = 0x2010,
- BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
- BRIG_KIND_INST_END = 0x2012,
- BRIG_KIND_OPERAND_BEGIN = 0x3000,
- BRIG_KIND_OPERAND_ADDRESS = 0x3000,
- BRIG_KIND_OPERAND_DATA = 0x3001,
- BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
- BRIG_KIND_OPERAND_CODE_REF = 0x3003,
- BRIG_KIND_OPERAND_IMAGE_PROPERTIES = 0x3004,
- BRIG_KIND_OPERAND_OPERAND_LIST = 0x3005,
- BRIG_KIND_OPERAND_REG = 0x3006,
- BRIG_KIND_OPERAND_SAMPLER_PROPERTIES = 0x3007,
- BRIG_KIND_OPERAND_STRING = 0x3008,
- BRIG_KIND_OPERAND_WAVESIZE = 0x3009,
- BRIG_KIND_OPERAND_END = 0x300a
+
+ BRIG_ATOMIC_ADD = 0,
+ BRIG_ATOMIC_AND = 1,
+ BRIG_ATOMIC_CAS = 2,
+ BRIG_ATOMIC_EXCH = 3,
+ BRIG_ATOMIC_LD = 4,
+ BRIG_ATOMIC_MAX = 5,
+ BRIG_ATOMIC_MIN = 6,
+ BRIG_ATOMIC_OR = 7,
+ BRIG_ATOMIC_ST = 8,
+ BRIG_ATOMIC_SUB = 9,
+ BRIG_ATOMIC_WRAPDEC = 10,
+ BRIG_ATOMIC_WRAPINC = 11,
+ BRIG_ATOMIC_XOR = 12,
+ BRIG_ATOMIC_WAIT_EQ = 13,
+ BRIG_ATOMIC_WAIT_NE = 14,
+ BRIG_ATOMIC_WAIT_LT = 15,
+ BRIG_ATOMIC_WAIT_GTE = 16,
+ BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
+ BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
+ BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
+ BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
};
-typedef uint8_t BrigCompareOperation8_t;
enum BrigCompareOperation {
- BRIG_COMPARE_EQ = 0,
- BRIG_COMPARE_NE = 1,
- BRIG_COMPARE_LT = 2,
- BRIG_COMPARE_LE = 3,
- BRIG_COMPARE_GT = 4,
- BRIG_COMPARE_GE = 5,
- BRIG_COMPARE_EQU = 6,
- BRIG_COMPARE_NEU = 7,
- BRIG_COMPARE_LTU = 8,
- BRIG_COMPARE_LEU = 9,
- BRIG_COMPARE_GTU = 10,
- BRIG_COMPARE_GEU = 11,
- BRIG_COMPARE_NUM = 12,
- BRIG_COMPARE_NAN = 13,
- BRIG_COMPARE_SEQ = 14,
- BRIG_COMPARE_SNE = 15,
- BRIG_COMPARE_SLT = 16,
- BRIG_COMPARE_SLE = 17,
- BRIG_COMPARE_SGT = 18,
- BRIG_COMPARE_SGE = 19,
- BRIG_COMPARE_SGEU = 20,
- BRIG_COMPARE_SEQU = 21,
- BRIG_COMPARE_SNEU = 22,
- BRIG_COMPARE_SLTU = 23,
- BRIG_COMPARE_SLEU = 24,
- BRIG_COMPARE_SNUM = 25,
- BRIG_COMPARE_SNAN = 26,
- BRIG_COMPARE_SGTU = 27
+
+ BRIG_COMPARE_EQ = 0,
+ BRIG_COMPARE_NE = 1,
+ BRIG_COMPARE_LT = 2,
+ BRIG_COMPARE_LE = 3,
+ BRIG_COMPARE_GT = 4,
+ BRIG_COMPARE_GE = 5,
+ BRIG_COMPARE_EQU = 6,
+ BRIG_COMPARE_NEU = 7,
+ BRIG_COMPARE_LTU = 8,
+ BRIG_COMPARE_LEU = 9,
+ BRIG_COMPARE_GTU = 10,
+ BRIG_COMPARE_GEU = 11,
+ BRIG_COMPARE_NUM = 12,
+ BRIG_COMPARE_NAN = 13,
+ BRIG_COMPARE_SEQ = 14,
+ BRIG_COMPARE_SNE = 15,
+ BRIG_COMPARE_SLT = 16,
+ BRIG_COMPARE_SLE = 17,
+ BRIG_COMPARE_SGT = 18,
+ BRIG_COMPARE_SGE = 19,
+ BRIG_COMPARE_SGEU = 20,
+ BRIG_COMPARE_SEQU = 21,
+ BRIG_COMPARE_SNEU = 22,
+ BRIG_COMPARE_SLTU = 23,
+ BRIG_COMPARE_SLEU = 24,
+ BRIG_COMPARE_SNUM = 25,
+ BRIG_COMPARE_SNAN = 26,
+ BRIG_COMPARE_SGTU = 27
};
-typedef uint16_t BrigControlDirective16_t;
enum BrigControlDirective {
- BRIG_CONTROL_NONE = 0,
- BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
- BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
- BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
- BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
- BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
- BRIG_CONTROL_REQUESTEDWORKGROUPSPERCU = 6,
- BRIG_CONTROL_REQUIREDDIM = 7,
- BRIG_CONTROL_REQUIREDGRIDSIZE = 8,
- BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 9,
- BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 10
+
+ BRIG_CONTROL_NONE = 0,
+ BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
+ BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
+ BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
+ BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
+ BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
+ BRIG_CONTROL_REQUIREDDIM = 6,
+ BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
+ BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
+ BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
};
-typedef uint8_t BrigExecutableModifier8_t;
-enum BrigExecuteableModifierMask {
- BRIG_EXECUTABLE_DEFINITION = 1
+enum BrigExecutableModifierMask {
+
+ BRIG_EXECUTABLE_DEFINITION = 1
};
-typedef uint8_t BrigImageChannelOrder8_t;
enum BrigImageChannelOrder {
- BRIG_CHANNEL_ORDER_A = 0,
- BRIG_CHANNEL_ORDER_R = 1,
- BRIG_CHANNEL_ORDER_RX = 2,
- BRIG_CHANNEL_ORDER_RG = 3,
- BRIG_CHANNEL_ORDER_RGX = 4,
- BRIG_CHANNEL_ORDER_RA = 5,
- BRIG_CHANNEL_ORDER_RGB = 6,
- BRIG_CHANNEL_ORDER_RGBX = 7,
- BRIG_CHANNEL_ORDER_RGBA = 8,
- BRIG_CHANNEL_ORDER_BGRA = 9,
- BRIG_CHANNEL_ORDER_ARGB = 10,
- BRIG_CHANNEL_ORDER_ABGR = 11,
- BRIG_CHANNEL_ORDER_SRGB = 12,
- BRIG_CHANNEL_ORDER_SRGBX = 13,
- BRIG_CHANNEL_ORDER_SRGBA = 14,
- BRIG_CHANNEL_ORDER_SBGRA = 15,
- BRIG_CHANNEL_ORDER_INTENSITY = 16,
- BRIG_CHANNEL_ORDER_LUMINANCE = 17,
- BRIG_CHANNEL_ORDER_DEPTH = 18,
- BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19
+
+ BRIG_CHANNEL_ORDER_A = 0,
+ BRIG_CHANNEL_ORDER_R = 1,
+ BRIG_CHANNEL_ORDER_RX = 2,
+ BRIG_CHANNEL_ORDER_RG = 3,
+ BRIG_CHANNEL_ORDER_RGX = 4,
+ BRIG_CHANNEL_ORDER_RA = 5,
+ BRIG_CHANNEL_ORDER_RGB = 6,
+ BRIG_CHANNEL_ORDER_RGBX = 7,
+ BRIG_CHANNEL_ORDER_RGBA = 8,
+ BRIG_CHANNEL_ORDER_BGRA = 9,
+ BRIG_CHANNEL_ORDER_ARGB = 10,
+ BRIG_CHANNEL_ORDER_ABGR = 11,
+ BRIG_CHANNEL_ORDER_SRGB = 12,
+ BRIG_CHANNEL_ORDER_SRGBX = 13,
+ BRIG_CHANNEL_ORDER_SRGBA = 14,
+ BRIG_CHANNEL_ORDER_SBGRA = 15,
+ BRIG_CHANNEL_ORDER_INTENSITY = 16,
+ BRIG_CHANNEL_ORDER_LUMINANCE = 17,
+ BRIG_CHANNEL_ORDER_DEPTH = 18,
+ BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
+
+ BRIG_CHANNEL_ORDER_UNKNOWN,
+
+ BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128
+
};
-typedef uint8_t BrigImageChannelType8_t;
enum BrigImageChannelType {
- BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
- BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
- BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
- BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
- BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
- BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
- BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
- BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
- BRIG_CHANNEL_TYPE_FLOAT = 15
+
+ BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
+ BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
+ BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
+ BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
+ BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
+ BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
+ BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
+ BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
+ BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
+ BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
+ BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
+ BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
+ BRIG_CHANNEL_TYPE_FLOAT = 15,
+
+ BRIG_CHANNEL_TYPE_UNKNOWN,
+
+ BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128
};
-typedef uint8_t BrigImageGeometry8_t;
enum BrigImageGeometry {
- BRIG_GEOMETRY_1D = 0,
- BRIG_GEOMETRY_2D = 1,
- BRIG_GEOMETRY_3D = 2,
- BRIG_GEOMETRY_1DA = 3,
- BRIG_GEOMETRY_2DA = 4,
- BRIG_GEOMETRY_1DB = 5,
- BRIG_GEOMETRY_2DDEPTH = 6,
- BRIG_GEOMETRY_2DADEPTH = 7
+
+ BRIG_GEOMETRY_1D = 0,
+ BRIG_GEOMETRY_2D = 1,
+ BRIG_GEOMETRY_3D = 2,
+ BRIG_GEOMETRY_1DA = 3,
+ BRIG_GEOMETRY_2DA = 4,
+ BRIG_GEOMETRY_1DB = 5,
+ BRIG_GEOMETRY_2DDEPTH = 6,
+ BRIG_GEOMETRY_2DADEPTH = 7,
+
+ BRIG_GEOMETRY_UNKNOWN,
+
+ BRIG_GEOMETRY_FIRST_USER_DEFINED = 128
};
-typedef uint8_t BrigImageQuery8_t;
enum BrigImageQuery {
- BRIG_IMAGE_QUERY_WIDTH = 0,
- BRIG_IMAGE_QUERY_HEIGHT = 1,
- BRIG_IMAGE_QUERY_DEPTH = 2,
- BRIG_IMAGE_QUERY_ARRAY = 3,
- BRIG_IMAGE_QUERY_CHANNELORDER = 4,
- BRIG_IMAGE_QUERY_CHANNELTYPE = 5
+
+ BRIG_IMAGE_QUERY_WIDTH = 0,
+ BRIG_IMAGE_QUERY_HEIGHT = 1,
+ BRIG_IMAGE_QUERY_DEPTH = 2,
+ BRIG_IMAGE_QUERY_ARRAY = 3,
+ BRIG_IMAGE_QUERY_CHANNELORDER = 4,
+ BRIG_IMAGE_QUERY_CHANNELTYPE = 5
};
-typedef uint8_t BrigLinkage8_t;
enum BrigLinkage {
- BRIG_LINKAGE_NONE = 0,
- BRIG_LINKAGE_PROGRAM = 1,
- BRIG_LINKAGE_MODULE = 2,
- BRIG_LINKAGE_FUNCTION = 3,
- BRIG_LINKAGE_ARG = 4
+
+ BRIG_LINKAGE_NONE = 0,
+ BRIG_LINKAGE_PROGRAM = 1,
+ BRIG_LINKAGE_MODULE = 2,
+ BRIG_LINKAGE_FUNCTION = 3,
+ BRIG_LINKAGE_ARG = 4
};
-typedef uint8_t BrigMachineModel8_t;
enum BrigMachineModel {
- BRIG_MACHINE_SMALL = 0,
- BRIG_MACHINE_LARGE = 1
+
+ BRIG_MACHINE_SMALL = 0,
+ BRIG_MACHINE_LARGE = 1,
+
+ BRIG_MACHINE_UNDEF = 2
};
-typedef uint8_t BrigMemoryModifier8_t;
enum BrigMemoryModifierMask {
- BRIG_MEMORY_CONST = 1
+ BRIG_MEMORY_CONST = 1
};
-typedef uint8_t BrigMemoryOrder8_t;
enum BrigMemoryOrder {
- BRIG_MEMORY_ORDER_NONE = 0,
- BRIG_MEMORY_ORDER_RELAXED = 1,
- BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
- BRIG_MEMORY_ORDER_SC_RELEASE = 3,
- BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4
+
+ BRIG_MEMORY_ORDER_NONE = 0,
+ BRIG_MEMORY_ORDER_RELAXED = 1,
+ BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
+ BRIG_MEMORY_ORDER_SC_RELEASE = 3,
+ BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4,
+
+ BRIG_MEMORY_ORDER_LAST = 5
};
-typedef uint8_t BrigMemoryScope8_t;
enum BrigMemoryScope {
- BRIG_MEMORY_SCOPE_NONE = 0,
- BRIG_MEMORY_SCOPE_WORKITEM = 1,
- BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
- BRIG_MEMORY_SCOPE_WORKGROUP = 3,
- BRIG_MEMORY_SCOPE_COMPONENT = 4,
- BRIG_MEMORY_SCOPE_SYSTEM = 5
+
+ BRIG_MEMORY_SCOPE_NONE = 0,
+ BRIG_MEMORY_SCOPE_WORKITEM = 1,
+ BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
+ BRIG_MEMORY_SCOPE_WORKGROUP = 3,
+ BRIG_MEMORY_SCOPE_AGENT = 4,
+ BRIG_MEMORY_SCOPE_SYSTEM = 5,
+
+ BRIG_MEMORY_SCOPE_LAST = 6
};
-typedef uint16_t BrigOpcode16_t;
enum BrigOpcode {
- BRIG_OPCODE_NOP = 0,
- BRIG_OPCODE_ABS = 1,
- BRIG_OPCODE_ADD = 2,
- BRIG_OPCODE_BORROW = 3,
- BRIG_OPCODE_CARRY = 4,
- BRIG_OPCODE_CEIL = 5,
- BRIG_OPCODE_COPYSIGN = 6,
- BRIG_OPCODE_DIV = 7,
- BRIG_OPCODE_FLOOR = 8,
- BRIG_OPCODE_FMA = 9,
- BRIG_OPCODE_FRACT = 10,
- BRIG_OPCODE_MAD = 11,
- BRIG_OPCODE_MAX = 12,
- BRIG_OPCODE_MIN = 13,
- BRIG_OPCODE_MUL = 14,
- BRIG_OPCODE_MULHI = 15,
- BRIG_OPCODE_NEG = 16,
- BRIG_OPCODE_REM = 17,
- BRIG_OPCODE_RINT = 18,
- BRIG_OPCODE_SQRT = 19,
- BRIG_OPCODE_SUB = 20,
- BRIG_OPCODE_TRUNC = 21,
- BRIG_OPCODE_MAD24 = 22,
- BRIG_OPCODE_MAD24HI = 23,
- BRIG_OPCODE_MUL24 = 24,
- BRIG_OPCODE_MUL24HI = 25,
- BRIG_OPCODE_SHL = 26,
- BRIG_OPCODE_SHR = 27,
- BRIG_OPCODE_AND = 28,
- BRIG_OPCODE_NOT = 29,
- BRIG_OPCODE_OR = 30,
- BRIG_OPCODE_POPCOUNT = 31,
- BRIG_OPCODE_XOR = 32,
- BRIG_OPCODE_BITEXTRACT = 33,
- BRIG_OPCODE_BITINSERT = 34,
- BRIG_OPCODE_BITMASK = 35,
- BRIG_OPCODE_BITREV = 36,
- BRIG_OPCODE_BITSELECT = 37,
- BRIG_OPCODE_FIRSTBIT = 38,
- BRIG_OPCODE_LASTBIT = 39,
- BRIG_OPCODE_COMBINE = 40,
- BRIG_OPCODE_EXPAND = 41,
- BRIG_OPCODE_LDA = 42,
- BRIG_OPCODE_MOV = 43,
- BRIG_OPCODE_SHUFFLE = 44,
- BRIG_OPCODE_UNPACKHI = 45,
- BRIG_OPCODE_UNPACKLO = 46,
- BRIG_OPCODE_PACK = 47,
- BRIG_OPCODE_UNPACK = 48,
- BRIG_OPCODE_CMOV = 49,
- BRIG_OPCODE_CLASS = 50,
- BRIG_OPCODE_NCOS = 51,
- BRIG_OPCODE_NEXP2 = 52,
- BRIG_OPCODE_NFMA = 53,
- BRIG_OPCODE_NLOG2 = 54,
- BRIG_OPCODE_NRCP = 55,
- BRIG_OPCODE_NRSQRT = 56,
- BRIG_OPCODE_NSIN = 57,
- BRIG_OPCODE_NSQRT = 58,
- BRIG_OPCODE_BITALIGN = 59,
- BRIG_OPCODE_BYTEALIGN = 60,
- BRIG_OPCODE_PACKCVT = 61,
- BRIG_OPCODE_UNPACKCVT = 62,
- BRIG_OPCODE_LERP = 63,
- BRIG_OPCODE_SAD = 64,
- BRIG_OPCODE_SADHI = 65,
- BRIG_OPCODE_SEGMENTP = 66,
- BRIG_OPCODE_FTOS = 67,
- BRIG_OPCODE_STOF = 68,
- BRIG_OPCODE_CMP = 69,
- BRIG_OPCODE_CVT = 70,
- BRIG_OPCODE_LD = 71,
- BRIG_OPCODE_ST = 72,
- BRIG_OPCODE_ATOMIC = 73,
- BRIG_OPCODE_ATOMICNORET = 74,
- BRIG_OPCODE_SIGNAL = 75,
- BRIG_OPCODE_SIGNALNORET = 76,
- BRIG_OPCODE_MEMFENCE = 77,
- BRIG_OPCODE_RDIMAGE = 78,
- BRIG_OPCODE_LDIMAGE = 79,
- BRIG_OPCODE_STIMAGE = 80,
- BRIG_OPCODE_QUERYIMAGE = 81,
- BRIG_OPCODE_QUERYSAMPLER = 82,
- BRIG_OPCODE_CBR = 83,
- BRIG_OPCODE_BR = 84,
- BRIG_OPCODE_SBR = 85,
- BRIG_OPCODE_BARRIER = 86,
- BRIG_OPCODE_WAVEBARRIER = 87,
- BRIG_OPCODE_ARRIVEFBAR = 88,
- BRIG_OPCODE_INITFBAR = 89,
- BRIG_OPCODE_JOINFBAR = 90,
- BRIG_OPCODE_LEAVEFBAR = 91,
- BRIG_OPCODE_RELEASEFBAR = 92,
- BRIG_OPCODE_WAITFBAR = 93,
- BRIG_OPCODE_LDF = 94,
- BRIG_OPCODE_ACTIVELANECOUNT = 95,
- BRIG_OPCODE_ACTIVELANEID = 96,
- BRIG_OPCODE_ACTIVELANEMASK = 97,
- BRIG_OPCODE_ACTIVELANESHUFFLE = 98,
- BRIG_OPCODE_CALL = 99,
- BRIG_OPCODE_SCALL = 100,
- BRIG_OPCODE_ICALL = 101,
- BRIG_OPCODE_LDI = 102,
- BRIG_OPCODE_RET = 103,
- BRIG_OPCODE_ALLOCA = 104,
- BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
- BRIG_OPCODE_DIM = 106,
- BRIG_OPCODE_GRIDGROUPS = 107,
- BRIG_OPCODE_GRIDSIZE = 108,
- BRIG_OPCODE_PACKETCOMPLETIONSIG = 109,
- BRIG_OPCODE_PACKETID = 110,
- BRIG_OPCODE_WORKGROUPID = 111,
- BRIG_OPCODE_WORKGROUPSIZE = 112,
- BRIG_OPCODE_WORKITEMABSID = 113,
- BRIG_OPCODE_WORKITEMFLATABSID = 114,
- BRIG_OPCODE_WORKITEMFLATID = 115,
- BRIG_OPCODE_WORKITEMID = 116,
- BRIG_OPCODE_CLEARDETECTEXCEPT = 117,
- BRIG_OPCODE_GETDETECTEXCEPT = 118,
- BRIG_OPCODE_SETDETECTEXCEPT = 119,
- BRIG_OPCODE_ADDQUEUEWRITEINDEX = 120,
- BRIG_OPCODE_AGENTCOUNT = 121,
- BRIG_OPCODE_AGENTID = 122,
- BRIG_OPCODE_CASQUEUEWRITEINDEX = 123,
- BRIG_OPCODE_LDK = 124,
- BRIG_OPCODE_LDQUEUEREADINDEX = 125,
- BRIG_OPCODE_LDQUEUEWRITEINDEX = 126,
- BRIG_OPCODE_QUEUEID = 127,
- BRIG_OPCODE_QUEUEPTR = 128,
- BRIG_OPCODE_STQUEUEREADINDEX = 129,
- BRIG_OPCODE_STQUEUEWRITEINDEX = 130,
- BRIG_OPCODE_CLOCK = 131,
- BRIG_OPCODE_CUID = 132,
- BRIG_OPCODE_DEBUGTRAP = 133,
- BRIG_OPCODE_GROUPBASEPTR = 134,
- BRIG_OPCODE_KERNARGBASEPTR = 135,
- BRIG_OPCODE_LANEID = 136,
- BRIG_OPCODE_MAXCUID = 137,
- BRIG_OPCODE_MAXWAVEID = 138,
- BRIG_OPCODE_NULLPTR = 139,
- BRIG_OPCODE_WAVEID = 140
+
+ BRIG_OPCODE_NOP = 0,
+ BRIG_OPCODE_ABS = 1,
+ BRIG_OPCODE_ADD = 2,
+ BRIG_OPCODE_BORROW = 3,
+ BRIG_OPCODE_CARRY = 4,
+ BRIG_OPCODE_CEIL = 5,
+ BRIG_OPCODE_COPYSIGN = 6,
+ BRIG_OPCODE_DIV = 7,
+ BRIG_OPCODE_FLOOR = 8,
+ BRIG_OPCODE_FMA = 9,
+ BRIG_OPCODE_FRACT = 10,
+ BRIG_OPCODE_MAD = 11,
+ BRIG_OPCODE_MAX = 12,
+ BRIG_OPCODE_MIN = 13,
+ BRIG_OPCODE_MUL = 14,
+ BRIG_OPCODE_MULHI = 15,
+ BRIG_OPCODE_NEG = 16,
+ BRIG_OPCODE_REM = 17,
+ BRIG_OPCODE_RINT = 18,
+ BRIG_OPCODE_SQRT = 19,
+ BRIG_OPCODE_SUB = 20,
+ BRIG_OPCODE_TRUNC = 21,
+ BRIG_OPCODE_MAD24 = 22,
+ BRIG_OPCODE_MAD24HI = 23,
+ BRIG_OPCODE_MUL24 = 24,
+ BRIG_OPCODE_MUL24HI = 25,
+ BRIG_OPCODE_SHL = 26,
+ BRIG_OPCODE_SHR = 27,
+ BRIG_OPCODE_AND = 28,
+ BRIG_OPCODE_NOT = 29,
+ BRIG_OPCODE_OR = 30,
+ BRIG_OPCODE_POPCOUNT = 31,
+ BRIG_OPCODE_XOR = 32,
+ BRIG_OPCODE_BITEXTRACT = 33,
+ BRIG_OPCODE_BITINSERT = 34,
+ BRIG_OPCODE_BITMASK = 35,
+ BRIG_OPCODE_BITREV = 36,
+ BRIG_OPCODE_BITSELECT = 37,
+ BRIG_OPCODE_FIRSTBIT = 38,
+ BRIG_OPCODE_LASTBIT = 39,
+ BRIG_OPCODE_COMBINE = 40,
+ BRIG_OPCODE_EXPAND = 41,
+ BRIG_OPCODE_LDA = 42,
+ BRIG_OPCODE_MOV = 43,
+ BRIG_OPCODE_SHUFFLE = 44,
+ BRIG_OPCODE_UNPACKHI = 45,
+ BRIG_OPCODE_UNPACKLO = 46,
+ BRIG_OPCODE_PACK = 47,
+ BRIG_OPCODE_UNPACK = 48,
+ BRIG_OPCODE_CMOV = 49,
+ BRIG_OPCODE_CLASS = 50,
+ BRIG_OPCODE_NCOS = 51,
+ BRIG_OPCODE_NEXP2 = 52,
+ BRIG_OPCODE_NFMA = 53,
+ BRIG_OPCODE_NLOG2 = 54,
+ BRIG_OPCODE_NRCP = 55,
+ BRIG_OPCODE_NRSQRT = 56,
+ BRIG_OPCODE_NSIN = 57,
+ BRIG_OPCODE_NSQRT = 58,
+ BRIG_OPCODE_BITALIGN = 59,
+ BRIG_OPCODE_BYTEALIGN = 60,
+ BRIG_OPCODE_PACKCVT = 61,
+ BRIG_OPCODE_UNPACKCVT = 62,
+ BRIG_OPCODE_LERP = 63,
+ BRIG_OPCODE_SAD = 64,
+ BRIG_OPCODE_SADHI = 65,
+ BRIG_OPCODE_SEGMENTP = 66,
+ BRIG_OPCODE_FTOS = 67,
+ BRIG_OPCODE_STOF = 68,
+ BRIG_OPCODE_CMP = 69,
+ BRIG_OPCODE_CVT = 70,
+ BRIG_OPCODE_LD = 71,
+ BRIG_OPCODE_ST = 72,
+ BRIG_OPCODE_ATOMIC = 73,
+ BRIG_OPCODE_ATOMICNORET = 74,
+ BRIG_OPCODE_SIGNAL = 75,
+ BRIG_OPCODE_SIGNALNORET = 76,
+ BRIG_OPCODE_MEMFENCE = 77,
+ BRIG_OPCODE_RDIMAGE = 78,
+ BRIG_OPCODE_LDIMAGE = 79,
+ BRIG_OPCODE_STIMAGE = 80,
+ BRIG_OPCODE_IMAGEFENCE = 81,
+ BRIG_OPCODE_QUERYIMAGE = 82,
+ BRIG_OPCODE_QUERYSAMPLER = 83,
+ BRIG_OPCODE_CBR = 84,
+ BRIG_OPCODE_BR = 85,
+ BRIG_OPCODE_SBR = 86,
+ BRIG_OPCODE_BARRIER = 87,
+ BRIG_OPCODE_WAVEBARRIER = 88,
+ BRIG_OPCODE_ARRIVEFBAR = 89,
+ BRIG_OPCODE_INITFBAR = 90,
+ BRIG_OPCODE_JOINFBAR = 91,
+ BRIG_OPCODE_LEAVEFBAR = 92,
+ BRIG_OPCODE_RELEASEFBAR = 93,
+ BRIG_OPCODE_WAITFBAR = 94,
+ BRIG_OPCODE_LDF = 95,
+ BRIG_OPCODE_ACTIVELANECOUNT = 96,
+ BRIG_OPCODE_ACTIVELANEID = 97,
+ BRIG_OPCODE_ACTIVELANEMASK = 98,
+ BRIG_OPCODE_ACTIVELANEPERMUTE = 99,
+ BRIG_OPCODE_CALL = 100,
+ BRIG_OPCODE_SCALL = 101,
+ BRIG_OPCODE_ICALL = 102,
+ BRIG_OPCODE_RET = 103,
+ BRIG_OPCODE_ALLOCA = 104,
+ BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
+ BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
+ BRIG_OPCODE_DIM = 107,
+ BRIG_OPCODE_GRIDGROUPS = 108,
+ BRIG_OPCODE_GRIDSIZE = 109,
+ BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
+ BRIG_OPCODE_PACKETID = 111,
+ BRIG_OPCODE_WORKGROUPID = 112,
+ BRIG_OPCODE_WORKGROUPSIZE = 113,
+ BRIG_OPCODE_WORKITEMABSID = 114,
+ BRIG_OPCODE_WORKITEMFLATABSID = 115,
+ BRIG_OPCODE_WORKITEMFLATID = 116,
+ BRIG_OPCODE_WORKITEMID = 117,
+ BRIG_OPCODE_CLEARDETECTEXCEPT = 118,
+ BRIG_OPCODE_GETDETECTEXCEPT = 119,
+ BRIG_OPCODE_SETDETECTEXCEPT = 120,
+ BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,
+ BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,
+ BRIG_OPCODE_LDQUEUEREADINDEX = 123,
+ BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,
+ BRIG_OPCODE_STQUEUEREADINDEX = 125,
+ BRIG_OPCODE_STQUEUEWRITEINDEX = 126,
+ BRIG_OPCODE_CLOCK = 127,
+ BRIG_OPCODE_CUID = 128,
+ BRIG_OPCODE_DEBUGTRAP = 129,
+ BRIG_OPCODE_GROUPBASEPTR = 130,
+ BRIG_OPCODE_KERNARGBASEPTR = 131,
+ BRIG_OPCODE_LANEID = 132,
+ BRIG_OPCODE_MAXCUID = 133,
+ BRIG_OPCODE_MAXWAVEID = 134,
+ BRIG_OPCODE_NULLPTR = 135,
+ BRIG_OPCODE_WAVEID = 136,
+ BRIG_OPCODE_FIRST_USER_DEFINED = 32768,
+
+ BRIG_OPCODE_GCNMADU = (1u << 15) | 0,
+ BRIG_OPCODE_GCNMADS = (1u << 15) | 1,
+ BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2,
+ BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3,
+ BRIG_OPCODE_GCNMED3 = (1u << 15) | 4,
+ BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5,
+ BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6,
+ BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7,
+ BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8,
+ BRIG_OPCODE_GCNBFM = (1u << 15) | 9,
+ BRIG_OPCODE_GCNLD = (1u << 15) | 10,
+ BRIG_OPCODE_GCNST = (1u << 15) | 11,
+ BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12,
+ BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13,
+ BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14,
+ BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15,
+ BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16,
+ BRIG_OPCODE_GCNMSAD = (1u << 15) | 17,
+ BRIG_OPCODE_GCNQSAD = (1u << 15) | 18,
+ BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19,
+ BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20,
+ BRIG_OPCODE_GCNSADW = (1u << 15) | 21,
+ BRIG_OPCODE_GCNSADD = (1u << 15) | 22,
+ BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23,
+ BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24,
+ BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25,
+ BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26,
+ BRIG_OPCODE_GCNMAX = (1u << 15) | 27,
+ BRIG_OPCODE_GCNMIN = (1u << 15) | 28,
+ BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29,
+ BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30
};
-typedef uint8_t BrigPack8_t;
enum BrigPack {
- BRIG_PACK_NONE = 0,
- BRIG_PACK_PP = 1,
- BRIG_PACK_PS = 2,
- BRIG_PACK_SP = 3,
- BRIG_PACK_SS = 4,
- BRIG_PACK_S = 5,
- BRIG_PACK_P = 6,
- BRIG_PACK_PPSAT = 7,
- BRIG_PACK_PSSAT = 8,
- BRIG_PACK_SPSAT = 9,
- BRIG_PACK_SSSAT = 10,
- BRIG_PACK_SSAT = 11,
- BRIG_PACK_PSAT = 12
+
+ BRIG_PACK_NONE = 0,
+ BRIG_PACK_PP = 1,
+ BRIG_PACK_PS = 2,
+ BRIG_PACK_SP = 3,
+ BRIG_PACK_SS = 4,
+ BRIG_PACK_S = 5,
+ BRIG_PACK_P = 6,
+ BRIG_PACK_PPSAT = 7,
+ BRIG_PACK_PSSAT = 8,
+ BRIG_PACK_SPSAT = 9,
+ BRIG_PACK_SSSAT = 10,
+ BRIG_PACK_SSAT = 11,
+ BRIG_PACK_PSAT = 12
};
-typedef uint8_t BrigProfile8_t;
enum BrigProfile {
- BRIG_PROFILE_BASE = 0,
- BRIG_PROFILE_FULL = 1
+
+ BRIG_PROFILE_BASE = 0,
+ BRIG_PROFILE_FULL = 1,
+
+ BRIG_PROFILE_UNDEF = 2
};
-typedef uint16_t BrigRegisterKind16_t;
enum BrigRegisterKind {
- BRIG_REGISTER_CONTROL = 0,
- BRIG_REGISTER_SINGLE = 1,
- BRIG_REGISTER_DOUBLE = 2,
- BRIG_REGISTER_QUAD = 3
+
+ BRIG_REGISTER_KIND_CONTROL = 0,
+ BRIG_REGISTER_KIND_SINGLE = 1,
+ BRIG_REGISTER_KIND_DOUBLE = 2,
+ BRIG_REGISTER_KIND_QUAD = 3
};
-typedef uint8_t BrigRound8_t;
enum BrigRound {
- BRIG_ROUND_NONE = 0,
- BRIG_ROUND_FLOAT_NEAR_EVEN = 1,
- BRIG_ROUND_FLOAT_ZERO = 2,
- BRIG_ROUND_FLOAT_PLUS_INFINITY = 3,
- BRIG_ROUND_FLOAT_MINUS_INFINITY = 4,
- BRIG_ROUND_INTEGER_NEAR_EVEN = 5,
- BRIG_ROUND_INTEGER_ZERO = 6,
- BRIG_ROUND_INTEGER_PLUS_INFINITY = 7,
- BRIG_ROUND_INTEGER_MINUS_INFINITY = 8,
- BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 9,
- BRIG_ROUND_INTEGER_ZERO_SAT = 10,
- BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 11,
- BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 12,
- BRIG_ROUND_INTEGER_SIGNALLING_NEAR_EVEN = 13,
- BRIG_ROUND_INTEGER_SIGNALLING_ZERO = 14,
- BRIG_ROUND_INTEGER_SIGNALLING_PLUS_INFINITY = 15,
- BRIG_ROUND_INTEGER_SIGNALLING_MINUS_INFINITY = 16,
- BRIG_ROUND_INTEGER_SIGNALLING_NEAR_EVEN_SAT = 17,
- BRIG_ROUND_INTEGER_SIGNALLING_ZERO_SAT = 18,
- BRIG_ROUND_INTEGER_SIGNALLING_PLUS_INFINITY_SAT = 19,
- BRIG_ROUND_INTEGER_SIGNALLING_MINUS_INFINITY_SAT = 20
+
+ BRIG_ROUND_NONE = 0,
+ BRIG_ROUND_FLOAT_DEFAULT = 1,
+ BRIG_ROUND_FLOAT_NEAR_EVEN = 2,
+ BRIG_ROUND_FLOAT_ZERO = 3,
+ BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,
+ BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,
+ BRIG_ROUND_INTEGER_NEAR_EVEN = 6,
+ BRIG_ROUND_INTEGER_ZERO = 7,
+ BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,
+ BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,
+ BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,
+ BRIG_ROUND_INTEGER_ZERO_SAT = 11,
+ BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,
+ BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,
+ BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,
+ BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,
+ BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,
+ BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,
+ BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,
+ BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,
+ BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,
+ BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21
};
-typedef uint8_t BrigSamplerAddressing8_t;
enum BrigSamplerAddressing {
- BRIG_ADDRESSING_UNDEFINED = 0,
- BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
- BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
- BRIG_ADDRESSING_REPEAT = 3,
- BRIG_ADDRESSING_MIRRORED_REPEAT = 4
+
+ BRIG_ADDRESSING_UNDEFINED = 0,
+ BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
+ BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
+ BRIG_ADDRESSING_REPEAT = 3,
+ BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
+
+ BRIG_ADDRESSING_FIRST_USER_DEFINED = 128
};
-typedef uint8_t BrigSamplerCoordNormalization8_t;
enum BrigSamplerCoordNormalization {
- BRIG_COORD_UNNORMALIZED = 0,
- BRIG_COORD_NORMALIZED = 1
+
+ BRIG_COORD_UNNORMALIZED = 0,
+ BRIG_COORD_NORMALIZED = 1
};
-typedef uint8_t BrigSamplerFilter8_t;
enum BrigSamplerFilter {
- BRIG_FILTER_NEAREST = 0,
- BRIG_FILTER_LINEAR = 1
+
+ BRIG_FILTER_NEAREST = 0,
+ BRIG_FILTER_LINEAR = 1,
+
+ BRIG_FILTER_FIRST_USER_DEFINED = 128
};
-typedef uint8_t BrigSamplerQuery8_t;
enum BrigSamplerQuery {
- BRIG_SAMPLER_QUERY_ADDRESSING = 0,
- BRIG_SAMPLER_QUERY_COORD = 1,
- BRIG_SAMPLER_QUERY_FILTER = 2
+
+ BRIG_SAMPLER_QUERY_ADDRESSING = 0,
+ BRIG_SAMPLER_QUERY_COORD = 1,
+ BRIG_SAMPLER_QUERY_FILTER = 2
};
-typedef uint32_t BrigSectionIndex32_t;
enum BrigSectionIndex {
- BRIG_SECTION_INDEX_DATA = 0,
- BRIG_SECTION_INDEX_CODE = 1,
- BRIG_SECTION_INDEX_OPERAND = 2,
- BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3
-};
-struct BrigSectionHeader {
- uint32_t byteCount;
- uint32_t headerByteCount;
- uint32_t nameLength;
- uint8_t name[1];
+ BRIG_SECTION_INDEX_DATA = 0,
+ BRIG_SECTION_INDEX_CODE = 1,
+ BRIG_SECTION_INDEX_OPERAND = 2,
+ BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
+
+ BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED
};
-typedef uint8_t BrigSegCvtModifier8_t;
enum BrigSegCvtModifierMask {
- BRIG_SEG_CVT_NONULL = 1
+ BRIG_SEG_CVT_NONULL = 1
};
-typedef uint8_t BrigSegment8_t;
enum BrigSegment {
- BRIG_SEGMENT_NONE = 0,
- BRIG_SEGMENT_FLAT = 1,
- BRIG_SEGMENT_GLOBAL = 2,
- BRIG_SEGMENT_READONLY = 3,
- BRIG_SEGMENT_KERNARG = 4,
- BRIG_SEGMENT_GROUP = 5,
- BRIG_SEGMENT_PRIVATE = 6,
- BRIG_SEGMENT_SPILL = 7,
- BRIG_SEGMENT_ARG = 8
-};
-
-enum {
- BRIG_TYPE_PACK_SHIFT = 5,
- BRIG_TYPE_BASE_MASK = (1 << BRIG_TYPE_PACK_SHIFT) - 1,
- BRIG_TYPE_PACK_MASK = 3 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT
+
+ BRIG_SEGMENT_NONE = 0,
+ BRIG_SEGMENT_FLAT = 1,
+ BRIG_SEGMENT_GLOBAL = 2,
+ BRIG_SEGMENT_READONLY = 3,
+ BRIG_SEGMENT_KERNARG = 4,
+ BRIG_SEGMENT_GROUP = 5,
+ BRIG_SEGMENT_PRIVATE = 6,
+ BRIG_SEGMENT_SPILL = 7,
+ BRIG_SEGMENT_ARG = 8,
+
+ BRIG_SEGMENT_FIRST_USER_DEFINED = 128,
+
+ BRIG_SEGMENT_AMD_GCN = 9
};
-typedef uint16_t BrigType16_t;
-enum BrigType {
- BRIG_TYPE_NONE = 0,
- BRIG_TYPE_U8 = 1,
- BRIG_TYPE_U16 = 2,
- BRIG_TYPE_U32 = 3,
- BRIG_TYPE_U64 = 4,
- BRIG_TYPE_S8 = 5,
- BRIG_TYPE_S16 = 6,
- BRIG_TYPE_S32 = 7,
- BRIG_TYPE_S64 = 8,
- BRIG_TYPE_F16 = 9,
- BRIG_TYPE_F32 = 10,
- BRIG_TYPE_F64 = 11,
- BRIG_TYPE_B1 = 12,
- BRIG_TYPE_B8 = 13,
- BRIG_TYPE_B16 = 14,
- BRIG_TYPE_B32 = 15,
- BRIG_TYPE_B64 = 16,
- BRIG_TYPE_B128 = 17,
- BRIG_TYPE_SAMP = 18,
- BRIG_TYPE_ROIMG = 19,
- BRIG_TYPE_WOIMG = 20,
- BRIG_TYPE_RWIMG = 21,
- BRIG_TYPE_SIG32 = 22,
- BRIG_TYPE_SIG64 = 23,
- BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128
+enum BrigPackedTypeBits {
+
+ BRIG_TYPE_BASE_SIZE = 5,
+ BRIG_TYPE_PACK_SIZE = 2,
+ BRIG_TYPE_ARRAY_SIZE = 1,
+
+ BRIG_TYPE_BASE_SHIFT = 0,
+ BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
+ BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
+
+ BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT,
+ BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,
+
+ BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT,
+
+ BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT
};
-struct BrigUInt64 {
- uint32_t lo;
- uint32_t hi;
+enum BrigType {
+
+ BRIG_TYPE_NONE = 0,
+ BRIG_TYPE_U8 = 1,
+ BRIG_TYPE_U16 = 2,
+ BRIG_TYPE_U32 = 3,
+ BRIG_TYPE_U64 = 4,
+ BRIG_TYPE_S8 = 5,
+ BRIG_TYPE_S16 = 6,
+ BRIG_TYPE_S32 = 7,
+ BRIG_TYPE_S64 = 8,
+ BRIG_TYPE_F16 = 9,
+ BRIG_TYPE_F32 = 10,
+ BRIG_TYPE_F64 = 11,
+ BRIG_TYPE_B1 = 12,
+ BRIG_TYPE_B8 = 13,
+ BRIG_TYPE_B16 = 14,
+ BRIG_TYPE_B32 = 15,
+ BRIG_TYPE_B64 = 16,
+ BRIG_TYPE_B128 = 17,
+ BRIG_TYPE_SAMP = 18,
+ BRIG_TYPE_ROIMG = 19,
+ BRIG_TYPE_WOIMG = 20,
+ BRIG_TYPE_RWIMG = 21,
+ BRIG_TYPE_SIG32 = 22,
+ BRIG_TYPE_SIG64 = 23,
+
+ BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
+ BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_INVALID = (unsigned) -1
};
-typedef uint8_t BrigVariableModifier8_t;
enum BrigVariableModifierMask {
- BRIG_SYMBOL_DECLARATION = 0,
- BRIG_SYMBOL_DEFINITION = 1,
- BRIG_SYMBOL_CONST = 2,
- BRIG_SYMBOL_ARRAY = 4,
- BRIG_SYMBOL_FLEX_ARRAY = 8
-};
-typedef uint32_t BrigVersion32_t;
-enum BrigVersion {
- BRIG_VERSION_HSAIL_MAJOR = 0,
- BRIG_VERSION_HSAIL_MINOR = 99,
- BRIG_VERSION_BRIG_MAJOR = 0,
- BRIG_VERSION_BRIG_MINOR = 99
+ BRIG_VARIABLE_DEFINITION = 1,
+ BRIG_VARIABLE_CONST = 2
};
-typedef uint8_t BrigWidth8_t;
enum BrigWidth {
- BRIG_WIDTH_NONE = 0,
- BRIG_WIDTH_1 = 1,
- BRIG_WIDTH_2 = 2,
- BRIG_WIDTH_4 = 3,
- BRIG_WIDTH_8 = 4,
- BRIG_WIDTH_16 = 5,
- BRIG_WIDTH_32 = 6,
- BRIG_WIDTH_64 = 7,
- BRIG_WIDTH_128 = 8,
- BRIG_WIDTH_256 = 9,
- BRIG_WIDTH_512 = 10,
- BRIG_WIDTH_1024 = 11,
- BRIG_WIDTH_2048 = 12,
- BRIG_WIDTH_4096 = 13,
- BRIG_WIDTH_8192 = 14,
- BRIG_WIDTH_16384 = 15,
- BRIG_WIDTH_32768 = 16,
- BRIG_WIDTH_65536 = 17,
- BRIG_WIDTH_131072 = 18,
- BRIG_WIDTH_262144 = 19,
- BRIG_WIDTH_524288 = 20,
- BRIG_WIDTH_1048576 = 21,
- BRIG_WIDTH_2097152 = 22,
- BRIG_WIDTH_4194304 = 23,
- BRIG_WIDTH_8388608 = 24,
- BRIG_WIDTH_16777216 = 25,
- BRIG_WIDTH_33554432 = 26,
- BRIG_WIDTH_67108864 = 27,
- BRIG_WIDTH_134217728 = 28,
- BRIG_WIDTH_268435456 = 29,
- BRIG_WIDTH_536870912 = 30,
- BRIG_WIDTH_1073741824 = 31,
- BRIG_WIDTH_2147483648 = 32,
- BRIG_WIDTH_WAVESIZE = 33,
- BRIG_WIDTH_ALL = 34
+
+ BRIG_WIDTH_NONE = 0,
+ BRIG_WIDTH_1 = 1,
+ BRIG_WIDTH_2 = 2,
+ BRIG_WIDTH_4 = 3,
+ BRIG_WIDTH_8 = 4,
+ BRIG_WIDTH_16 = 5,
+ BRIG_WIDTH_32 = 6,
+ BRIG_WIDTH_64 = 7,
+ BRIG_WIDTH_128 = 8,
+ BRIG_WIDTH_256 = 9,
+ BRIG_WIDTH_512 = 10,
+ BRIG_WIDTH_1024 = 11,
+ BRIG_WIDTH_2048 = 12,
+ BRIG_WIDTH_4096 = 13,
+ BRIG_WIDTH_8192 = 14,
+ BRIG_WIDTH_16384 = 15,
+ BRIG_WIDTH_32768 = 16,
+ BRIG_WIDTH_65536 = 17,
+ BRIG_WIDTH_131072 = 18,
+ BRIG_WIDTH_262144 = 19,
+ BRIG_WIDTH_524288 = 20,
+ BRIG_WIDTH_1048576 = 21,
+ BRIG_WIDTH_2097152 = 22,
+ BRIG_WIDTH_4194304 = 23,
+ BRIG_WIDTH_8388608 = 24,
+ BRIG_WIDTH_16777216 = 25,
+ BRIG_WIDTH_33554432 = 26,
+ BRIG_WIDTH_67108864 = 27,
+ BRIG_WIDTH_134217728 = 28,
+ BRIG_WIDTH_268435456 = 29,
+ BRIG_WIDTH_536870912 = 30,
+ BRIG_WIDTH_1073741824 = 31,
+ BRIG_WIDTH_2147483648 = 32,
+ BRIG_WIDTH_WAVESIZE = 33,
+ BRIG_WIDTH_ALL = 34,
+
+ BRIG_WIDTH_LAST
};
-struct BrigData {
- uint32_t byteCount;
- uint8_t bytes[1];
+struct BrigUInt64 {
+ uint32_t lo;
+ uint32_t hi;
+
+};
+
+struct BrigAluModifier {
+ BrigAluModifier8_t allBits;
+
};
struct BrigBase {
- uint16_t byteCount;
- BrigKinds16_t kind;
+ uint16_t byteCount;
+ BrigKind16_t kind;
+};
+
+struct BrigData {
+
+ uint32_t byteCount;
+ uint8_t bytes[1];
+};
+
+struct BrigExecutableModifier {
+ BrigExecutableModifier8_t allBits;
+
+};
+
+struct BrigMemoryModifier {
+ BrigMemoryModifier8_t allBits;
+
};
-struct BrigDirectiveArgBlock {
- BrigBase base;
+struct BrigSegCvtModifier {
+ BrigSegCvtModifier8_t allBits;
+
+};
+
+struct BrigVariableModifier {
+ BrigVariableModifier8_t allBits;
+
+};
+
+struct BrigDirectiveArgBlockEnd {
+ BrigBase base;
+};
+
+struct BrigDirectiveArgBlockStart {
+ BrigBase base;
};
struct BrigDirectiveComment {
- BrigBase base;
- BrigDataOffsetString32_t name;
+ BrigBase base;
+ BrigDataOffsetString32_t name;
};
struct BrigDirectiveControl {
- BrigBase base;
- BrigControlDirective16_t control;
- uint16_t reserved;
- BrigDataOffsetOperandList32_t operands;
+ BrigBase base;
+ BrigControlDirective16_t control;
+ uint16_t reserved;
+ BrigDataOffsetOperandList32_t operands;
};
struct BrigDirectiveExecutable {
- BrigBase base;
- BrigDataOffsetString32_t name;
- uint16_t outArgCount;
- uint16_t inArgCount;
- BrigCodeOffset32_t firstInArg;
- BrigCodeOffset32_t firstCodeBlockEntry;
- BrigCodeOffset32_t nextModuleEntry;
- uint32_t codeBlockEntryCount;
- BrigExecutableModifier8_t modifier;
- BrigLinkage8_t linkage;
- uint16_t reserved;
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ uint16_t outArgCount;
+ uint16_t inArgCount;
+ BrigCodeOffset32_t firstInArg;
+ BrigCodeOffset32_t firstCodeBlockEntry;
+ BrigCodeOffset32_t nextModuleEntry;
+ BrigExecutableModifier modifier;
+ BrigLinkage8_t linkage;
+ uint16_t reserved;
};
struct BrigDirectiveExtension {
- BrigBase base;
- BrigDataOffsetString32_t name;
+ BrigBase base;
+ BrigDataOffsetString32_t name;
};
struct BrigDirectiveFbarrier {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigExecutableModifier8_t modifier;
- BrigLinkage8_t linkage;
- uint16_t reserved;
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigVariableModifier modifier;
+ BrigLinkage8_t linkage;
+ uint16_t reserved;
};
struct BrigDirectiveLabel {
- BrigBase base;
- BrigDataOffsetString32_t name;
+ BrigBase base;
+ BrigDataOffsetString32_t name;
};
struct BrigDirectiveLoc {
- BrigBase base;
- BrigDataOffsetString32_t filename;
- uint32_t line;
- uint32_t column;
+ BrigBase base;
+ BrigDataOffsetString32_t filename;
+ uint32_t line;
+ uint32_t column;
};
struct BrigDirectiveNone {
- BrigBase base;
+ BrigBase base;
};
struct BrigDirectivePragma {
- BrigBase base;
- BrigDataOffsetOperandList32_t operands;
+ BrigBase base;
+ BrigDataOffsetOperandList32_t operands;
};
struct BrigDirectiveVariable {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigOperandOffset32_t init;
- BrigType16_t type;
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- BrigUInt64 dim;
- BrigVariableModifier8_t modifier;
- BrigLinkage8_t linkage;
- BrigAllocation8_t allocation;
- uint8_t reserved;
-};
-
-struct BrigDirectiveVersion {
- BrigBase base;
- BrigVersion32_t hsailMajor;
- BrigVersion32_t hsailMinor;
- BrigVersion32_t brigMajor;
- BrigVersion32_t brigMinor;
- BrigProfile8_t profile;
- BrigMachineModel8_t machineModel;
- uint16_t reserved;
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigOperandOffset32_t init;
+ BrigType16_t type;
+
+ BrigSegment8_t segment;
+ BrigAlignment8_t align;
+ BrigUInt64 dim;
+ BrigVariableModifier modifier;
+ BrigLinkage8_t linkage;
+ BrigAllocation8_t allocation;
+ uint8_t reserved;
+};
+
+struct BrigDirectiveModule {
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigVersion32_t hsailMajor;
+ BrigVersion32_t hsailMinor;
+ BrigProfile8_t profile;
+ BrigMachineModel8_t machineModel;
+ BrigRound8_t defaultFloatRound;
+ uint8_t reserved;
};
struct BrigInstBase {
- BrigBase base;
- BrigOpcode16_t opcode;
- BrigType16_t type;
- BrigDataOffsetOperandList32_t operands;
+ BrigBase base;
+ BrigOpcode16_t opcode;
+ BrigType16_t type;
+ BrigDataOffsetOperandList32_t operands;
+
};
struct BrigInstAddr {
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3];
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ uint8_t reserved[3];
};
struct BrigInstAtomic {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t memoryScope;
- BrigAtomicOperation8_t atomicOperation;
- uint8_t equivClass;
- uint8_t reserved[3];
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigMemoryScope8_t memoryScope;
+ BrigAtomicOperation8_t atomicOperation;
+ uint8_t equivClass;
+ uint8_t reserved[3];
};
struct BrigInstBasic {
- BrigInstBase base;
+ BrigInstBase base;
};
struct BrigInstBr {
- BrigInstBase base;
- BrigWidth8_t width;
- uint8_t reserved[3];
+ BrigInstBase base;
+ BrigWidth8_t width;
+ uint8_t reserved[3];
};
struct BrigInstCmp {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier16_t modifier;
- BrigCompareOperation8_t compare;
- BrigPack8_t pack;
- uint16_t reserved;
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigAluModifier modifier;
+ BrigCompareOperation8_t compare;
+ BrigPack8_t pack;
+ uint8_t reserved[3];
};
struct BrigInstCvt {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier16_t modifier;
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigAluModifier modifier;
+ BrigRound8_t round;
};
struct BrigInstImage {
- BrigInstBase base;
- BrigType16_t imageType;
- BrigType16_t coordType;
- BrigImageGeometry8_t geometry;
- uint8_t equivClass;
- uint16_t reserved;
+ BrigInstBase base;
+ BrigType16_t imageType;
+ BrigType16_t coordType;
+ BrigImageGeometry8_t geometry;
+ uint8_t equivClass;
+ uint16_t reserved;
};
struct BrigInstLane {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigWidth8_t width;
- uint8_t reserved;
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigWidth8_t width;
+ uint8_t reserved;
};
struct BrigInstMem {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- uint8_t equivClass;
- BrigWidth8_t width;
- BrigMemoryModifier8_t modifier;
- uint8_t reserved[3];
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigAlignment8_t align;
+ uint8_t equivClass;
+ BrigWidth8_t width;
+ BrigMemoryModifier modifier;
+ uint8_t reserved[3];
};
struct BrigInstMemFence {
- BrigInstBase base;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t globalSegmentMemoryScope;
- BrigMemoryScope8_t groupSegmentMemoryScope;
- BrigMemoryScope8_t imageSegmentMemoryScope;
+ BrigInstBase base;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigMemoryScope8_t globalSegmentMemoryScope;
+ BrigMemoryScope8_t groupSegmentMemoryScope;
+ BrigMemoryScope8_t imageSegmentMemoryScope;
};
struct BrigInstMod {
- BrigInstBase base;
- BrigAluModifier16_t modifier;
- BrigPack8_t pack;
- uint8_t reserved;
+ BrigInstBase base;
+ BrigAluModifier modifier;
+ BrigRound8_t round;
+ BrigPack8_t pack;
+ uint8_t reserved;
};
struct BrigInstQueryImage {
- BrigInstBase base;
- BrigType16_t imageType;
- BrigImageGeometry8_t geometry;
- BrigImageQuery8_t query;
+ BrigInstBase base;
+ BrigType16_t imageType;
+ BrigImageGeometry8_t geometry;
+ BrigImageQuery8_t imageQuery;
};
struct BrigInstQuerySampler {
- BrigInstBase base;
- BrigSamplerQuery8_t query;
- uint8_t reserved[3];
+ BrigInstBase base;
+ BrigSamplerQuery8_t samplerQuery;
+ uint8_t reserved[3];
};
struct BrigInstQueue {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- uint16_t reserved;
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigMemoryOrder8_t memoryOrder;
+ uint16_t reserved;
};
struct BrigInstSeg {
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3];
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ uint8_t reserved[3];
};
struct BrigInstSegCvt {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigSegment8_t segment;
- BrigSegCvtModifier8_t modifier;
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigSegment8_t segment;
+ BrigSegCvtModifier modifier;
};
struct BrigInstSignal {
- BrigInstBase base;
- BrigType16_t signalType;
- BrigMemoryOrder8_t memoryOrder;
- BrigAtomicOperation8_t signalOperation;
+ BrigInstBase base;
+ BrigType16_t signalType;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigAtomicOperation8_t signalOperation;
};
struct BrigInstSourceType {
- BrigInstBase base;
- BrigType16_t sourceType;
- uint16_t reserved;
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ uint16_t reserved;
};
struct BrigOperandAddress {
- BrigBase base;
- BrigCodeOffset32_t symbol;
- BrigOperandOffset32_t reg;
- BrigUInt64 offset;
+ BrigBase base;
+ BrigCodeOffset32_t symbol;
+ BrigOperandOffset32_t reg;
+ BrigUInt64 offset;
+};
+
+struct BrigOperandAlign {
+ BrigBase base;
+ BrigAlignment8_t align;
+ uint8_t reserved[3];
};
struct BrigOperandCodeList {
- BrigBase base;
- BrigDataOffsetCodeList32_t elements;
+ BrigBase base;
+ BrigDataOffsetCodeList32_t elements;
+
};
struct BrigOperandCodeRef {
- BrigBase base;
- BrigCodeOffset32_t ref;
+ BrigBase base;
+ BrigCodeOffset32_t ref;
+};
+
+struct BrigOperandConstantBytes {
+ BrigBase base;
+ BrigType16_t type;
+ uint16_t reserved;
+ BrigDataOffsetString32_t bytes;
};
-struct BrigOperandData {
- BrigBase base;
- BrigDataOffset32_t data;
+struct BrigOperandConstantOperandList {
+ BrigBase base;
+ BrigType16_t type;
+ uint16_t reserved;
+ BrigDataOffsetOperandList32_t elements;
+
};
-struct BrigOperandImageProperties {
- BrigBase base;
- uint64_t width;
- uint64_t height;
- uint64_t depth;
- uint64_t array;
- BrigImageGeometry8_t geometry;
- BrigImageChannelOrder8_t channelOrder;
- BrigImageChannelType8_t channelType;
- uint8_t reserved;
+struct BrigOperandConstantImage {
+ BrigBase base;
+ BrigType16_t type;
+ BrigImageGeometry8_t geometry;
+ BrigImageChannelOrder8_t channelOrder;
+ BrigImageChannelType8_t channelType;
+ uint8_t reserved[3];
+ BrigUInt64 width;
+ BrigUInt64 height;
+ BrigUInt64 depth;
+ BrigUInt64 array;
};
struct BrigOperandOperandList {
- BrigBase base;
- BrigDataOffsetOperandList32_t elements;
+ BrigBase base;
+ BrigDataOffsetOperandList32_t elements;
+
};
-struct BrigOperandReg {
- BrigBase base;
- BrigRegisterKind16_t regKind;
- uint16_t regNum;
+struct BrigOperandRegister {
+ BrigBase base;
+ BrigRegisterKind16_t regKind;
+ uint16_t regNum;
};
-struct BrigOperandSamplerProperties {
- BrigBase base;
- BrigSamplerCoordNormalization8_t coord;
- BrigSamplerFilter8_t filter;
- BrigSamplerAddressing8_t addressing;
- uint8_t reserved;
+struct BrigOperandConstantSampler {
+ BrigBase base;
+ BrigType16_t type;
+ BrigSamplerCoordNormalization8_t coord;
+ BrigSamplerFilter8_t filter;
+ BrigSamplerAddressing8_t addressing;
+ uint8_t reserved[3];
};
struct BrigOperandString {
- BrigBase base;
- BrigDataOffsetString32_t string;
+ BrigBase base;
+ BrigDataOffsetString32_t string;
};
struct BrigOperandWavesize {
- BrigBase base;
+ BrigBase base;
};
-struct BrigModule {
- uint32_t sectionCount;
- BrigSectionHeader* section[1];
+enum BrigExceptionsMask {
+ BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
+ BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
+ BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
+ BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
+ BRIG_EXCEPTIONS_INEXACT = 1 << 4,
+
+ BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
};
+struct BrigSectionHeader {
+ uint64_t byteCount;
+ uint32_t headerByteCount;
+ uint32_t nameLength;
+ uint8_t name[1];
+};
+
+#define MODULE_IDENTIFICATION_LENGTH (8)
+
+struct BrigModuleHeader {
+ char identification[MODULE_IDENTIFICATION_LENGTH];
+ BrigVersion32_t brigMajor;
+ BrigVersion32_t brigMinor;
+ uint64_t byteCount;
+ uint8_t hash[64];
+ uint32_t reserved;
+ uint32_t sectionCount;
+ uint64_t sectionIndex;
+};
+
+typedef BrigModuleHeader* BrigModule_t;
+
#endif /* HSA_BRIG_FORMAT_H */
@@ -25,19 +25,29 @@ along with GCC; see the file COPYING3. If not see
#include "hard-reg-set.h"
#include "hsa.h"
#include "tree.h"
+#include "tree-iterator.h"
#include "stor-layout.h"
#include "tree-cfg.h"
+#include "tree-ssa-alias.h"
#include "machmode.h"
#include "output.h"
+#include "gimple-expr.h"
#include "dominance.h"
#include "cfg.h"
#include "function.h"
+#include "gimple.h"
#include "basic-block.h"
#include "vec.h"
+#include "stringpool.h"
#include "gimple-pretty-print.h"
#include "diagnostic-core.h"
#include "hash-map.h"
+#include "ipa-ref.h"
+#include "lto-streamer.h"
+#include "cgraph.h"
+#define BRIG_ELF_SECTION_NAME ".brig"
+#define BRIG_LABEL_STRING "hsa_brig"
#define BRIG_SECTION_DATA_NAME "hsa_data"
#define BRIG_SECTION_CODE_NAME "hsa_code"
#define BRIG_SECTION_OPERAND_NAME "hsa_operand"
@@ -67,7 +77,11 @@ public:
/* Section name that will be output to the BRIG. */
const char *section_name;
/* Size in bytes of all data stored in the section. */
- unsigned total_size, header_byte_count;
+ unsigned total_size;
+ /* The size of the header of the section including padding. */
+ unsigned header_byte_count;
+ /* The size of the header of the section without any padding. */
+ unsigned header_byte_delta;
/* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
vec <struct hsa_brig_data_chunk> chunks;
@@ -123,15 +137,15 @@ hsa_brig_section::allocate_new_chunk ()
void
hsa_brig_section::init (const char *name)
{
- struct BrigSectionHeader sample;
-
section_name = name;
- total_size = sizeof(sample.byteCount) + sizeof(sample.headerByteCount)
- + sizeof(sample.nameLength);
- /* Add strlen + null termination to the section size*/
- total_size = total_size + strlen(section_name) + 1;
+ /* While the following computation is basically wrong, because the intent
+ certainly wasn't to have the first character of name and padding, which
+ are a part of sizeof (BrigSectionHeader), included in the first addend,
+ this is what the disassembler expects. */
+ total_size = sizeof (BrigSectionHeader) + strlen(section_name);
chunks.create (1);
allocate_new_chunk ();
+ header_byte_delta = total_size;
round_size_up (4);
header_byte_count = total_size;
}
@@ -154,14 +168,17 @@ void
hsa_brig_section::output ()
{
struct BrigSectionHeader section_header;
+ char padding[8];
- switch_to_section (get_section (section_name, SECTION_NOTYPE, NULL));
-
- section_header.byteCount = htole32 (total_size);
- section_header.nameLength = htole32 (strlen(section_name));
+ section_header.byteCount = htole64 (total_size);
section_header.headerByteCount = htole32 (header_byte_count);
- assemble_string ((const char*) §ion_header, 12);
- assemble_string (section_name, (section_header.nameLength + 1));
+ section_header.nameLength = htole32 (strlen(section_name));
+ assemble_string ((const char*) §ion_header, 16);
+ assemble_string (section_name, (section_header.nameLength));
+ memset (&padding, 0, sizeof (padding));
+ /* This is also a consequence of the wrong header size computation described
+ in a comment in hsa_brig_section::init. */
+ assemble_string (padding, 8);
for (unsigned i = 0; i < chunks.length (); i++)
assemble_string (chunks[i].data, chunks[i].size);
}
@@ -214,7 +231,7 @@ hsa_brig_section::get_ptr_by_offset (unsigned int offset)
{
gcc_assert (offset < total_size);
- offset -= header_byte_count;
+ offset -= header_byte_delta;
unsigned int i;
for (i = 0; offset >= chunks[i].size; i++)
@@ -223,7 +240,6 @@ hsa_brig_section::get_ptr_by_offset (unsigned int offset)
return chunks[i].data + offset;
}
-
/* BRIG string data hashing. */
struct brig_string_slot
@@ -356,7 +372,6 @@ static struct operand_queue
static void
brig_init (void)
{
- struct BrigDirectiveVersion verdir;
brig_insn_count = 0;
if (brig_initialized)
@@ -366,21 +381,37 @@ brig_init (void)
brig_data.init (BRIG_SECTION_DATA_NAME);
brig_code.init (BRIG_SECTION_CODE_NAME);
brig_operand.init (BRIG_SECTION_OPERAND_NAME);
+ brig_initialized = true;
- verdir.base.byteCount = htole16 (sizeof (verdir));
- verdir.base.kind = htole16 (BRIG_KIND_DIRECTIVE_VERSION);
- verdir.hsailMajor = htole32 (BRIG_VERSION_HSAIL_MAJOR) ;
- verdir.hsailMinor = htole32 (BRIG_VERSION_HSAIL_MINOR);
- verdir.brigMajor = htole32 (BRIG_VERSION_BRIG_MAJOR);
- verdir.brigMinor = htole32 (BRIG_VERSION_BRIG_MINOR);
- verdir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
+ struct BrigDirectiveModule moddir;
+ memset (&moddir, 0, sizeof (moddir));
+ moddir.base.byteCount = htole16 (sizeof (moddir));
+
+ char *modname;
+ if (!in_lto_p && main_input_filename)
+ {
+ const char *part = strrchr (main_input_filename, '/');
+ if (!part)
+ part = main_input_filename;
+ asprintf (&modname, "&%s", part);
+ char* extension = strchr (modname, '.');
+ if (extension)
+ *extension = '\0';
+ moddir.name = brig_emit_string (modname);
+ free (modname);
+ }
+ else
+ moddir.name = brig_emit_string (main_input_filename);
+ moddir.base.kind = htole16 (BRIG_KIND_DIRECTIVE_MODULE);
+ moddir.hsailMajor = htole32 (BRIG_VERSION_HSAIL_MAJOR) ;
+ moddir.hsailMinor = htole32 (BRIG_VERSION_HSAIL_MINOR);
+ moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
if (hsa_machine_large_p ())
- verdir.machineModel = BRIG_MACHINE_LARGE;
+ moddir.machineModel = BRIG_MACHINE_LARGE;
else
- verdir.machineModel = BRIG_MACHINE_SMALL;
- verdir.reserved = 0;
- brig_code.add (&verdir, sizeof (verdir));
- brig_initialized = true;
+ moddir.machineModel = BRIG_MACHINE_SMALL;
+ moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
+ brig_code.add (&moddir, sizeof (moddir));
}
/* Free all BRIG data. */
@@ -433,6 +464,7 @@ emit_directive_variable (struct hsa_symbol *symbol)
if (symbol->directive_offset)
return symbol->directive_offset;
+ memset (&dirvar, 0, sizeof (dirvar));
dirvar.base.byteCount = htole16 (sizeof (dirvar));
dirvar.base.kind = htole16 (BRIG_KIND_DIRECTIVE_VARIABLE);
dirvar.allocation = BRIG_ALLOCATION_AUTOMATIC;
@@ -472,7 +504,7 @@ emit_directive_variable (struct hsa_symbol *symbol)
dirvar.linkage = symbol->linkage;
dirvar.dim.lo = htole32 (symbol->dimLo);
dirvar.dim.hi = htole32 (symbol->dimHi);
- dirvar.modifier = BRIG_SYMBOL_DEFINITION;
+ dirvar.modifier.allBits |= BRIG_VARIABLE_DEFINITION;
dirvar.reserved = 0;
symbol->directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
@@ -506,8 +538,10 @@ emit_function_directives (void)
next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
+ memset (&fndir, 0, sizeof (fndir));
fndir.base.byteCount = htole16 (sizeof (fndir));
- fndir.base.kind = htole16 (hsa_cfun.kern_p ? BRIG_KIND_DIRECTIVE_KERNEL : BRIG_KIND_DIRECTIVE_FUNCTION);
+ fndir.base.kind = htole16 (hsa_cfun.kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
+ : BRIG_KIND_DIRECTIVE_FUNCTION);
fndir.name = htole32 (name_offset);
fndir.inArgCount = htole16 (hsa_cfun.input_args_count);
fndir.outArgCount = htole16 (hsa_cfun.output_arg ? 1 : 0);
@@ -515,8 +549,7 @@ emit_function_directives (void)
fndir.firstCodeBlockEntry = htole32 (scoped_off);
fndir.nextModuleEntry = htole32 (next_toplev_off);
fndir.linkage = BRIG_LINKAGE_PROGRAM;
- fndir.codeBlockEntryCount = htole32 (0);
- fndir.modifier = BRIG_EXECUTABLE_DEFINITION;
+ fndir.modifier.allBits |= BRIG_EXECUTABLE_DEFINITION;
memset (&fndir.reserved, 0, sizeof (fndir.reserved));
function_offsets.put (cfun->decl, brig_code.total_size);
@@ -721,9 +754,9 @@ enqueue_op (hsa_op_base *op)
op_queue.last_op = op;
if (is_a <hsa_op_immed *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandData);
+ op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
else if (is_a <hsa_op_reg *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandReg);
+ op_queue.projected_size += sizeof (struct BrigOperandRegister);
else if (is_a <hsa_op_address *> (op))
{
op_queue.projected_size += sizeof (struct BrigOperandAddress);
@@ -742,7 +775,7 @@ enqueue_op (hsa_op_base *op)
static void
emit_immediate_operand (hsa_op_immed *imm)
{
- struct BrigOperandData out;
+ struct BrigOperandConstantBytes out;
uint32_t byteCount;
union
@@ -754,6 +787,7 @@ emit_immediate_operand (hsa_op_immed *imm)
} bytes;
unsigned len;
+ memset (&out, 0, sizeof (out));
switch (imm->type)
{
case BRIG_TYPE_U8:
@@ -841,10 +875,10 @@ emit_immediate_operand (hsa_op_immed *imm)
}
out.base.byteCount = htole16 (sizeof (out));
- out.base.kind = htole16 (BRIG_KIND_OPERAND_DATA);
- byteCount = len ;
-
- out.data = brig_data.add (&byteCount, sizeof (byteCount));
+ out.base.kind = htole16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
+ byteCount = htole32 (len);
+ out.type = htole16 (imm->type);
+ out.bytes = brig_data.add (&byteCount, sizeof (byteCount));
brig_data.add (&bytes, len);
brig_operand.add (&out, sizeof(out));
@@ -856,20 +890,20 @@ emit_immediate_operand (hsa_op_immed *imm)
static void
emit_register_operand (hsa_op_reg *reg)
{
- struct BrigOperandReg out;
+ struct BrigOperandRegister out;
out.base.byteCount = htole16 (sizeof (out));
- out.base.kind = htole16 (BRIG_KIND_OPERAND_REG);
+ out.base.kind = htole16 (BRIG_KIND_OPERAND_REGISTER);
out.regNum = htole32 (reg->hard_num);
if (BRIG_TYPE_B32 == regtype_for_type (reg->type))
- out.regKind = BRIG_REGISTER_SINGLE;
+ out.regKind = BRIG_REGISTER_KIND_SINGLE;
else if (BRIG_TYPE_B64 == regtype_for_type (reg->type))
- out.regKind = BRIG_REGISTER_DOUBLE;
+ out.regKind = BRIG_REGISTER_KIND_DOUBLE;
else if (BRIG_TYPE_B128 == regtype_for_type (reg->type))
- out.regKind = BRIG_REGISTER_QUAD;
+ out.regKind = BRIG_REGISTER_KIND_QUAD;
else if (BRIG_TYPE_B1 == regtype_for_type (reg->type))
- out.regKind = BRIG_REGISTER_CONTROL;
+ out.regKind = BRIG_REGISTER_KIND_CONTROL;
else
gcc_unreachable ();
@@ -997,7 +1031,7 @@ emit_memory_insn (hsa_insn_mem *mem)
repr.segment = addr->symbol->segment;
else
repr.segment = BRIG_SEGMENT_FLAT;
- repr.modifier = 0 ;
+ repr.modifier.allBits = 0 ;
repr.equivClass = mem->equiv_class;
repr.align = BRIG_ALIGNMENT_1;
if (mem->opcode == BRIG_OPCODE_LD)
@@ -1119,7 +1153,7 @@ emit_segment_insn (hsa_insn_seg *seg)
repr.sourceType = htole16 (as_a <hsa_op_reg *> (seg->operands[1])->type);
repr.segment = seg->segment;
- repr.modifier = 0;
+ repr.modifier.allBits = 0;
brig_code.add (&repr, sizeof (repr));
@@ -1136,6 +1170,7 @@ emit_cmp_insn (hsa_insn_cmp *cmp)
BrigOperandOffset32_t operand_offsets[3];
uint32_t byteCount;
+ memset (&repr, 0, sizeof (repr));
repr.base.base.byteCount = htole16 (sizeof (repr));
repr.base.base.kind = htole16 (BRIG_KIND_INST_CMP);
repr.base.opcode = htole16 (cmp->opcode);
@@ -1155,10 +1190,9 @@ emit_cmp_insn (hsa_insn_cmp *cmp)
repr.sourceType = htole16 (as_a <hsa_op_reg *> (cmp->operands[1])->type);
else
repr.sourceType = htole16 (as_a <hsa_op_immed *> (cmp->operands[1])->type);
- repr.modifier = 0;
+ repr.modifier.allBits = 0;
repr.compare = cmp->compare;
repr.pack = 0;
- repr.reserved = 0;
brig_code.add (&repr, sizeof (repr));
brig_insn_count++;
@@ -1261,17 +1295,16 @@ emit_cvt_insn (hsa_insn_basic *insn)
else
srctype = as_a <hsa_op_immed *> (insn->operands[1])->type;
repr.sourceType = htole16 (srctype);
-
+ repr.modifier.allBits = 0;
/* float to smaller float requires a rounding setting (we default
to 'near'. */
if (float_type_p (insn->type)
&& (!float_type_p (srctype)
|| ((insn->type & BRIG_TYPE_BASE_MASK)
< (srctype & BRIG_TYPE_BASE_MASK))))
- repr.modifier = BRIG_ROUND_FLOAT_NEAR_EVEN;
+ repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
else
- repr.modifier = 0;
-
+ repr.round = BRIG_ROUND_NONE;
brig_code.add (&repr, sizeof (repr));
brig_insn_count++;
}
@@ -1281,15 +1314,22 @@ emit_cvt_insn (hsa_insn_basic *insn)
static void
emit_arg_block (bool is_start)
{
- struct BrigDirectiveArgBlock repr;
- repr.base.byteCount = htole16 (sizeof (repr));
-
- BrigKinds16_t kind = is_start ? BRIG_KIND_DIRECTIVE_ARG_BLOCK_START
- : BRIG_KIND_DIRECTIVE_ARG_BLOCK_END;
- repr.base.kind = htole16 (kind);
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
+ if (is_start)
+ {
+ struct BrigDirectiveArgBlockStart repr;
+ repr.base.byteCount = htole16 (sizeof (repr));
+ repr.base.kind = htole16 (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START);
+ brig_code.add (&repr, sizeof (repr));
+ brig_insn_count++;
+ }
+ else
+ {
+ struct BrigDirectiveArgBlockEnd repr;
+ repr.base.byteCount = htole16 (sizeof (repr));
+ repr.base.kind = htole16 (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END);
+ brig_code.add (&repr, sizeof (repr));
+ brig_insn_count++;
+ }
}
/* Emit call instruction INSN, where this instruction must be closed
@@ -1390,14 +1430,12 @@ emit_basic_insn (hsa_insn_basic *insn)
return;
}
+ memset (&repr, 0, sizeof (repr));
repr.base.base.byteCount = htole16 (sizeof (BrigInstBasic));
repr.base.base.kind = htole16 (BRIG_KIND_INST_BASIC);
repr.base.opcode = htole16 (insn->opcode);
switch (insn->opcode)
{
- /* XXX The spec says mov can take all types. But the LLVM based
- simulator cries about "Mov_s32" not being defined. */
- case BRIG_OPCODE_MOV:
/* And the bit-logical operations need bit types and whine about
arithmetic types :-/ */
case BRIG_OPCODE_AND:
@@ -1430,9 +1468,9 @@ emit_basic_insn (hsa_insn_basic *insn)
if ((type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE)
{
if (float_type_p (type))
- repr.modifier = BRIG_ROUND_FLOAT_NEAR_EVEN;
+ repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
else
- repr.modifier = 0;
+ repr.round = 0;
/* We assume that destination and sources agree in packing
layout. */
if (insn->operands[2])
@@ -1578,12 +1616,173 @@ hsa_brig_emit_function (void)
prev_bb = bb;
}
perhaps_emit_branch (prev_bb, NULL);
- ptr_to_fndir->codeBlockEntryCount = brig_insn_count ;
ptr_to_fndir->nextModuleEntry = brig_code.total_size;
emit_queued_operands ();
}
+static GTY(()) tree hsa_ctor_statements;
+
+/* Create a static initializator that will register out brig stufgf with
+ libgomp. */
+
+static void
+hsa_output_kernel_mapping (tree brig_decl)
+{
+ unsigned map_count = hsa_get_number_decl_kernel_mappings ();
+ gcc_assert (map_count > 0);
+
+ tree int_num_of_kernels;
+ int_num_of_kernels = build_int_cst (integer_type_node, (int) map_count);
+ tree kernel_num_index_type = build_index_type (int_num_of_kernels);
+ tree host_functions_array_type = build_array_type (ptr_type_node,
+ kernel_num_index_type);
+
+ vec<constructor_elt, va_gc> *host_functions_vec = NULL;
+ for (unsigned i = 0; i < map_count; ++i)
+ {
+ tree decl = hsa_get_decl_kernel_mapping_decl (i);
+ CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE,
+ build_fold_addr_expr (decl));
+ }
+ tree host_functions_ctor = build_constructor (host_functions_array_type,
+ host_functions_vec);
+ char tmp_name[64];
+ ASM_GENERATE_INTERNAL_LABEL (tmp_name, "hsa_host_functions", 1);
+ tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (tmp_name),
+ host_functions_array_type);
+ TREE_STATIC (hsa_host_func_table) = 1;
+ TREE_READONLY (hsa_host_func_table) = 1;
+ TREE_PUBLIC (hsa_host_func_table) = 0;
+ DECL_ARTIFICIAL (hsa_host_func_table) = 1;
+ DECL_IGNORED_P (hsa_host_func_table) = 1;
+ DECL_EXTERNAL (hsa_host_func_table) = 0;
+ TREE_CONSTANT (hsa_host_func_table) = 1;
+ DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
+ varpool_node::finalize_decl (hsa_host_func_table);
+
+ int len = 0;
+ for (unsigned i = 0; i < map_count; ++i)
+ {
+ char *name = hsa_get_decl_kernel_mapping_name (i);
+ /* We add 1 for the terminating zero and 1 for an ampersand prefix. */
+ len = len + strlen (name) + 2;
+ }
+ len++;
+
+ char *buf = XNEWVEC (char, len);
+ char *p = buf;
+ for (unsigned i = 0; i < map_count; ++i)
+ {
+ char *name = hsa_get_decl_kernel_mapping_name (i);
+ int ll = strlen (name);
+ gcc_assert (ll > 0);
+ *p = '&';
+ p++;
+ memcpy (p, name, ll);
+ p += ll;
+ *p = '\0';
+ p++;
+ }
+ *p = '\0';
+ tree kern_names = build_string (len, buf);
+ TREE_TYPE (kern_names) = build_array_type (char_type_node,
+ build_index_type (size_int (len)));
+ free (buf);
+
+ tree hsa_image_desc_type = make_node (RECORD_TYPE);
+ tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+ get_identifier ("brig_module"), ptr_type_node);
+ DECL_CHAIN (id_f1) = NULL_TREE;
+ tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+ get_identifier ("kern_names"), ptr_type_node);
+ DECL_CHAIN (id_f2) = id_f1;
+ finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f2,
+ NULL_TREE);
+
+ vec<constructor_elt, va_gc> *img_desc_vec = NULL;
+ CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
+ build_fold_addr_expr (brig_decl));
+ CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
+ build1 (ADDR_EXPR,
+ build_pointer_type (TREE_TYPE (kern_names)),
+ kern_names));
+
+ tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
+
+ ASM_GENERATE_INTERNAL_LABEL (tmp_name, "hsa_img_descriptor", 1);
+ tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (tmp_name),
+ hsa_image_desc_type);
+ TREE_STATIC (hsa_img_descriptor) = 1;
+ TREE_READONLY (hsa_img_descriptor) = 1;
+ TREE_PUBLIC (hsa_img_descriptor) = 0;
+ DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
+ DECL_IGNORED_P (hsa_img_descriptor) = 1;
+ DECL_EXTERNAL (hsa_img_descriptor) = 0;
+ TREE_CONSTANT (hsa_img_descriptor) = 1;
+ DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
+ varpool_node::finalize_decl (hsa_img_descriptor);
+
+ /* Construct the "host_table" libgomp expects. */
+ tree libgomp_host_table_type = build_array_type (ptr_type_node,
+ build_index_type
+ (build_int_cst
+ (integer_type_node, 4)));
+ vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
+ tree host_func_table_addr = build_fold_addr_expr (hsa_host_func_table);
+ CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
+ host_func_table_addr);
+ offset_int func_table_size = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node))
+ * map_count;
+ CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
+ fold_build2 (POINTER_PLUS_EXPR,
+ TREE_TYPE (host_func_table_addr),
+ host_func_table_addr,
+ build_int_cst (size_type_node,
+ func_table_size.to_uhwi
+ ())));
+ CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
+ CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
+ tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
+ libgomp_host_table_vec);
+ ASM_GENERATE_INTERNAL_LABEL (tmp_name, "hsa_libgomp_host_table", 1);
+ tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (tmp_name),
+ libgomp_host_table_type);
+
+ TREE_STATIC (hsa_libgomp_host_table) = 1;
+ TREE_READONLY (hsa_libgomp_host_table) = 1;
+ TREE_PUBLIC (hsa_libgomp_host_table) = 0;
+ DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
+ DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
+ DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
+ TREE_CONSTANT (hsa_libgomp_host_table) = 1;
+ DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
+ varpool_node::finalize_decl (hsa_libgomp_host_table);
+
+ /* Generate an initializer with a call to the registration routine. */
+
+ /* FIXME: gomp_offload_register has one more enum parameter omitted here. */
+ tree reg_fn_type = build_function_type_list (void_type_node, ptr_type_node,
+ ptr_type_node, NULL_TREE);
+ tree reg_fn = build_fn_decl ("__hsa_register_image", reg_fn_type);
+ append_to_statement_list
+ (build_call_expr (reg_fn, 2,
+ build_fold_addr_expr (hsa_libgomp_host_table),
+ build_fold_addr_expr (hsa_img_descriptor)),
+ &hsa_ctor_statements);
+ cgraph_build_static_cdtor ('I', hsa_ctor_statements, DEFAULT_INIT_PRIORITY);
+}
+
+
+#define HSA_SECTION_ALIGNMENT 16
+
+/* Emit the brig module we have compiled to a section in the final assembly and
+ also create a compile unit static constructor that will register the brig
+ module with libgomp. */
+
void
hsa_output_brig (void)
{
@@ -1613,13 +1812,74 @@ hsa_output_brig (void)
saved_section = in_section;
+ switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
+ char tmp_name[64];
+ ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
+ ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
+ tree brig_id = get_identifier (tmp_name);
+ tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
+ char_type_node);
+ SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
+ TREE_ADDRESSABLE (brig_decl) = 1;
+ TREE_READONLY (brig_decl) = 1;
+ DECL_ARTIFICIAL (brig_decl) = 1;
+ DECL_IGNORED_P (brig_decl) = 1;
+ TREE_STATIC (brig_decl) = 1;
+ TREE_PUBLIC (brig_decl) = 0;
+ TREE_USED (brig_decl) = 1;
+ DECL_INITIAL (brig_decl) = brig_decl;
+ TREE_ASM_WRITTEN (brig_decl) = 1;
+
+ BrigModuleHeader module_header;
+ memcpy (&module_header.identification, "HSA BRIG",
+ sizeof(module_header.identification));
+ module_header.brigMajor = htole32 (BRIG_VERSION_BRIG_MAJOR);
+ module_header.brigMinor = htole32 (BRIG_VERSION_BRIG_MINOR);
+ uint64_t section_index[3];
+
+ int data_padding, code_padding, operand_padding;
+ data_padding = HSA_SECTION_ALIGNMENT
+ - brig_data.total_size % HSA_SECTION_ALIGNMENT;
+ code_padding = HSA_SECTION_ALIGNMENT
+ - brig_code.total_size % HSA_SECTION_ALIGNMENT;
+ operand_padding = HSA_SECTION_ALIGNMENT
+ - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
+
+ uint64_t module_size = sizeof (module_header) + sizeof (section_index)
+ + brig_data.total_size + data_padding
+ + brig_code.total_size + code_padding
+ + brig_operand.total_size + operand_padding;
+ gcc_assert ((module_size % 16) == 0);
+ module_header.byteCount = htole64 (module_size);
+ memset (&module_header.hash, 0, sizeof (module_header.hash));
+ module_header.reserved = 0;
+ module_header.sectionCount = htole32 (3);
+ module_header.sectionIndex = htole64 (sizeof (module_header));
+ assemble_string ((const char *) &module_header, sizeof(module_header));
+ uint64_t off = sizeof (module_header) + sizeof (section_index);
+ section_index[0] = htole64 (off);
+ off += brig_data.total_size + data_padding;
+ section_index[1] = htole64 (off);
+ off += brig_code.total_size + code_padding;
+ section_index[2] = htole64 (off);
+ assemble_string ((const char *) §ion_index, sizeof (section_index));
+
+ char padding[HSA_SECTION_ALIGNMENT];
+ memset (padding, 0, sizeof(padding));
+
brig_data.output ();
+ assemble_string (padding, data_padding);
brig_code.output ();
+ assemble_string (padding, code_padding);
brig_operand.output ();
+ assemble_string (padding, operand_padding);
if (saved_section)
switch_to_section (saved_section);
+ hsa_output_kernel_mapping (brig_decl);
+
+ hsa_free_decl_kernel_mapping ();
brig_release_data ();
hsa_deinit_compilation_unit_data ();
}
@@ -347,16 +347,12 @@ hsa_opcode_name (BrigOpcode16_t opcode)
return "activelaneid";
case BRIG_OPCODE_ACTIVELANEMASK:
return "activelanemask";
- case BRIG_OPCODE_ACTIVELANESHUFFLE:
- return "activelaneshuffle";
case BRIG_OPCODE_CALL:
return "call";
case BRIG_OPCODE_SCALL:
return "scall";
case BRIG_OPCODE_ICALL:
return "icall";
- case BRIG_OPCODE_LDI:
- return "ldi";
case BRIG_OPCODE_RET:
return "ret";
case BRIG_OPCODE_ALLOCA:
@@ -393,22 +389,12 @@ hsa_opcode_name (BrigOpcode16_t opcode)
return "setdetectexcept";
case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
return "addqueuewriteindex";
- case BRIG_OPCODE_AGENTCOUNT:
- return "agentcount";
- case BRIG_OPCODE_AGENTID:
- return "agentid";
case BRIG_OPCODE_CASQUEUEWRITEINDEX:
return "casqueuewriteindex";
- case BRIG_OPCODE_LDK:
- return "ldk";
case BRIG_OPCODE_LDQUEUEREADINDEX:
return "ldqueuereadindex";
case BRIG_OPCODE_LDQUEUEWRITEINDEX:
return "ldqueuewriteindex";
- case BRIG_OPCODE_QUEUEID:
- return "queueid";
- case BRIG_OPCODE_QUEUEPTR:
- return "queueptr";
case BRIG_OPCODE_STQUEUEREADINDEX:
return "stqueuereadindex";
case BRIG_OPCODE_STQUEUEWRITEINDEX:
@@ -570,11 +556,11 @@ hsa_memscope_name (enum BrigMemoryScope scope)
case BRIG_MEMORY_SCOPE_WORKITEM:
return "wi";
case BRIG_MEMORY_SCOPE_WAVEFRONT:
- return "wv";
+ return "wave";
case BRIG_MEMORY_SCOPE_WORKGROUP:
return "wg";
- case BRIG_MEMORY_SCOPE_COMPONENT:
- return "cmp";
+ case BRIG_MEMORY_SCOPE_AGENT:
+ return "agent";
case BRIG_MEMORY_SCOPE_SYSTEM:
return "sys";
default:
@@ -66,6 +66,20 @@ along with GCC; see the file COPYING3. If not see
function. */
struct hsa_function_representation hsa_cfun;
+/* Element of the mapping vector between a host decl and an HSA kernel. */
+
+struct GTY(()) hsa_decl_kernel_map_element
+{
+ /* The decl of the host function. */
+ tree decl;
+ /* Name of the HSA kernel in BRIG. */
+ char * GTY((skip)) name;
+};
+
+/* Mapping between decls and corresponding HSA kernels in this compilation
+ unit. */
+static GTY (()) vec<hsa_decl_kernel_map_element, va_gc> *hsa_decl_kernel_mapping;
+
/* Alloc pools for allocating basic hsa structures such as operands,
instructions and other basic entitie.s */
static alloc_pool hsa_allocp_operand_address;
@@ -244,7 +258,10 @@ hsa_deinit_data_for_cfun (void)
delete hsa_cfun.local_symbols;
free (hsa_cfun.input_args);
free (hsa_cfun.output_arg);
- free (hsa_cfun.name);
+ /* Kernel names are deallocated at the end of BRIG output when deallocating
+ hsa_decl_kernel_mapping. */
+ if (!hsa_cfun.kern_p)
+ free (hsa_cfun.name);
hsa_cfun.spill_symbols.release();
}
@@ -580,7 +597,7 @@ hsa_alloc_immed_op (tree tree_val)
&& !POINTER_TYPE_P (TREE_TYPE (tree_val)));
memset (imm, 0 , sizeof (hsa_op_immed));
- imm->kind = BRIG_KIND_OPERAND_DATA;
+ imm->kind = BRIG_KIND_OPERAND_CONSTANT_BYTES;
imm->type = hsa_type_for_scalar_tree_type (TREE_TYPE (tree_val), true);
imm->value = tree_val;
@@ -612,7 +629,7 @@ hsa_alloc_reg_op (void)
hreg = (hsa_op_reg *) pool_alloc (hsa_allocp_operand_reg);
hsa_list_operand_reg.safe_push (hreg);
memset (hreg, 0, sizeof (hsa_op_reg));
- hreg->kind = BRIG_KIND_OPERAND_REG;
+ hreg->kind = BRIG_KIND_OPERAND_REGISTER;
/* TODO: Try removing later on. I suppose this is not necessary but I'd
rather avoid surprises. */
hreg->order = hsa_cfun.reg_count++;
@@ -2225,6 +2242,50 @@ gen_function_parameters (vec <hsa_op_reg_p> ssa_map)
}
}
+/* Create a mapping between the original function DECL and kernel name NAME. */
+
+static void
+hsa_add_kern_decl_mapping (tree decl, char *name)
+{
+ hsa_decl_kernel_map_element dkm;
+ dkm.decl = decl;
+ dkm.name = name;
+ vec_safe_push (hsa_decl_kernel_mapping, dkm);
+}
+
+/* Return the number of kernel decl name mappings. */
+
+unsigned
+hsa_get_number_decl_kernel_mappings (void)
+{
+ return vec_safe_length (hsa_decl_kernel_mapping);
+}
+
+/* Return the decl in the Ith kernel decl name mapping. */
+
+tree
+hsa_get_decl_kernel_mapping_decl (unsigned i)
+{
+ return (*hsa_decl_kernel_mapping)[i].decl;
+}
+
+/* Return the name in the Ith kernel decl name mapping. */
+
+char *
+hsa_get_decl_kernel_mapping_name (unsigned i)
+{
+ return (*hsa_decl_kernel_mapping)[i].name;
+}
+
+/* Free the mapping between original decls and kernel names. */
+
+void
+hsa_free_decl_kernel_mapping (void)
+{
+ for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i)
+ free ((*hsa_decl_kernel_mapping)[i].name);
+ ggc_free (hsa_decl_kernel_mapping);
+}
static void
sanitize_hsa_name (char *p)
@@ -2244,14 +2305,19 @@ generate_hsa (void)
hsa_init_data_for_cfun ();
- bool kern_p = lookup_attribute ("hsakernel",
- DECL_ATTRIBUTES (current_function_decl));
+ bool kern_p = lookup_attribute ("hsa",
+ DECL_ATTRIBUTES (current_function_decl))
+ || lookup_attribute ("hsakernel", DECL_ATTRIBUTES (current_function_decl));
hsa_cfun.kern_p = kern_p;
ssa_map.safe_grow_cleared (SSANAMES (cfun)->length ());
hsa_cfun.name
= xstrdup (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
sanitize_hsa_name (hsa_cfun.name);
+
+ if (kern_p)
+ hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun.name);
+
gen_function_parameters (ssa_map);
if (seen_error ())
goto fail;
@@ -2277,93 +2343,51 @@ generate_hsa (void)
}
static GTY(()) tree hsa_launch_fn;
-static GTY(()) tree hsa_kernel_desc_type;
static GTY(()) tree hsa_dim_array_type;
-static GTY(()) tree hsa_range_dimnum_decl;
-static GTY(()) tree hsa_range_grid_decl;
-static GTY(()) tree hsa_range_group_decl;
-static GTY(()) tree hsa_launch_range_type;
+static GTY(()) tree hsa_lattrs_dimnum_decl;
+static GTY(()) tree hsa_lattrs_grid_decl;
+static GTY(()) tree hsa_lattrs_group_decl;
+static GTY(()) tree hsa_lattrs_nargs_decl;
+static GTY(()) tree hsa_launch_attributes_type;
static void
init_hsa_functions (void)
{
- tree launch_fn_type;
- tree fields, f;
- tree constcharptr;
if (hsa_launch_fn)
return;
- constcharptr = build_pointer_type (build_qualified_type
- (char_type_node, TYPE_QUAL_CONST));
-
- hsa_kernel_desc_type = make_node (RECORD_TYPE);
- fields = NULL_TREE;
- f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("filename"), constcharptr);
- DECL_CHAIN (f) = fields;
- fields = f;
- f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("name"), constcharptr);
- DECL_CHAIN (f) = fields;
- fields = f;
- f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("nargs"), uint64_type_node);
- DECL_CHAIN (f) = fields;
- fields = f;
- f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("kernel"), ptr_type_node);
- DECL_CHAIN (f) = fields;
- fields = f;
- f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("context"), ptr_type_node);
- DECL_CHAIN (f) = fields;
- fields = f;
-
- finish_builtin_struct (hsa_kernel_desc_type, "__hsa_kernel_desc",
- fields, NULL_TREE);
-
-
tree dim_arr_index_type;
dim_arr_index_type = build_index_type (build_int_cst (integer_type_node, 2));
hsa_dim_array_type = build_array_type (uint32_type_node, dim_arr_index_type);
- hsa_launch_range_type = make_node (RECORD_TYPE);
- fields = NULL_TREE;
- hsa_range_dimnum_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("dimension"),
- uint32_type_node);
- DECL_CHAIN (hsa_range_dimnum_decl) = NULL_TREE;
+ hsa_launch_attributes_type = make_node (RECORD_TYPE);
+ hsa_lattrs_dimnum_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+ get_identifier ("ndim"),
+ uint32_type_node);
+ DECL_CHAIN (hsa_lattrs_dimnum_decl) = NULL_TREE;
- hsa_range_grid_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+ hsa_lattrs_grid_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
get_identifier ("global_size"),
hsa_dim_array_type);
- DECL_CHAIN (hsa_range_grid_decl) = hsa_range_dimnum_decl;
- hsa_range_group_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+ DECL_CHAIN (hsa_lattrs_grid_decl) = hsa_lattrs_dimnum_decl;
+ hsa_lattrs_group_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
get_identifier ("group_size"),
hsa_dim_array_type);
- DECL_CHAIN (hsa_range_group_decl) = hsa_range_grid_decl;
- tree reserved = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("reserved"), uint32_type_node);
- DECL_CHAIN (reserved) = hsa_range_group_decl;
-
- /* This is in fact okra_range_s, but let's call everything HSA, at least for
- now. */
- finish_builtin_struct (hsa_launch_range_type, "__hsa_launch_range",
- reserved, NULL_TREE);
-
- /* __hsa_launch_kernel (__hsa_kernel_desc * kd, __hsa_launch_range* range,
- uint64_t *args) */
-
+ DECL_CHAIN (hsa_lattrs_group_decl) = hsa_lattrs_grid_decl;
+ hsa_lattrs_nargs_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+ get_identifier ("nargs"),
+ uint32_type_node);
+ DECL_CHAIN (hsa_lattrs_nargs_decl) = hsa_lattrs_group_decl;
+ finish_builtin_struct (hsa_launch_attributes_type, "__hsa_launch_attributes",
+ hsa_lattrs_nargs_decl, NULL_TREE);
+ tree launch_fn_type;
launch_fn_type
- = build_function_type_list (void_type_node,
- build_pointer_type (hsa_kernel_desc_type),
- build_pointer_type (hsa_launch_range_type),
+ = build_function_type_list (void_type_node, ptr_type_node,
+ build_pointer_type (hsa_launch_attributes_type),
build_pointer_type (uint64_type_node),
NULL_TREE);
- hsa_launch_fn
- = build_fn_decl ("__hsa_launch_kernel",
- launch_fn_type);
+ hsa_launch_fn = build_fn_decl ("__hsa_launch_kernel", launch_fn_type);
}
/* Insert before the current statement in GSI a store of VALUE to INDEX of
@@ -2382,12 +2406,110 @@ insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
}
+
+static void
+wrap_hsa_kernel_call (gimple_stmt_iterator *gsi, tree fndecl)
+{
+ init_hsa_functions ();
+
+ bool real_kern_p = lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl));
+ tree grid_size_1, group_size_1;
+ tree u32_one = build_int_cst (uint32_type_node, 1);
+ gimple call_stmt = gsi_stmt (*gsi);
+ unsigned discard_arguents, num_args = gimple_call_num_args (call_stmt);
+ if (real_kern_p)
+ {
+ discard_arguents = 2;
+ if (num_args < 2)
+ {
+ error ("Calls to functions with hsakernel attribute must "
+ "have at least two arguments.");
+ grid_size_1 = group_size_1 = u32_one;
+ }
+ else
+ {
+ grid_size_1 = fold_convert (uint32_type_node,
+ gimple_call_arg (call_stmt, num_args - 2));
+ grid_size_1 = force_gimple_operand_gsi (gsi, grid_size_1, true,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+ group_size_1 = fold_convert (uint32_type_node,
+ gimple_call_arg (call_stmt,
+ num_args - 1));
+ group_size_1 = force_gimple_operand_gsi (gsi, group_size_1, true,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+ }
+ else
+ {
+ discard_arguents = 0;
+ grid_size_1 = build_int_cst (uint32_type_node, 64);
+ group_size_1 = build_int_cst (uint32_type_node, 16);
+ }
+
+ tree lattrs = create_tmp_var (hsa_launch_attributes_type,
+ "__hsa_launch_attrs");
+ tree dimref = build3 (COMPONENT_REF, uint32_type_node,
+ lattrs, hsa_lattrs_dimnum_decl, NULL_TREE);
+ gsi_insert_before (gsi, gimple_build_assign (dimref, u32_one), GSI_SAME_STMT);
+ insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 0,
+ grid_size_1);
+ insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 1,
+ u32_one);
+ insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 2,
+ u32_one);
+ insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 0,
+ group_size_1);
+ insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 1,
+ u32_one);
+ insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 2,
+ u32_one);
+ tree nargsref = build3 (COMPONENT_REF, uint32_type_node,
+ lattrs, hsa_lattrs_nargs_decl, NULL_TREE);
+ tree nargsval = build_int_cst (uint32_type_node, num_args - discard_arguents);
+ gsi_insert_before (gsi, gimple_build_assign (nargsref, nargsval),
+ GSI_SAME_STMT);
+ lattrs = build_fold_addr_expr (lattrs);
+
+ tree args;
+ args = create_tmp_var (build_array_type_nelts (uint64_type_node,
+ num_args - discard_arguents),
+ NULL);
+
+ gcc_assert (num_args >= discard_arguents);
+ for (unsigned i = 0; i < (num_args - discard_arguents); i++)
+ {
+ tree arg = gimple_call_arg (call_stmt, i);
+ gimple g;
+
+ tree r = build4 (ARRAY_REF, uint64_type_node, args,
+ size_int (i), NULL_TREE, NULL_TREE);
+
+ arg = force_gimple_operand_gsi (gsi, fold_convert (uint64_type_node, arg),
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ g = gimple_build_assign (r, arg);
+ gsi_insert_before (gsi, g, GSI_SAME_STMT);
+ }
+
+ args = build_fold_addr_expr (args);
+
+ /* XXX doesn't handle calls with lhs, doesn't remove EH
+ edges. */
+ gimple launch = gimple_build_call (hsa_launch_fn, 3,
+ build_fold_addr_expr (fndecl),
+ lattrs, args);
+ gsi_insert_before (gsi, launch, GSI_SAME_STMT);
+ unlink_stmt_vdef (call_stmt);
+ gsi_remove (gsi, true);
+}
+
+
static unsigned int
-wrap_hsa (void)
+wrap_all_hsa_calls (void)
{
bool changed = false;
basic_block bb;
- init_hsa_functions ();
FOR_EACH_BB_FN (bb, cfun)
{
gimple_stmt_iterator gsi;
@@ -2398,169 +2520,7 @@ wrap_hsa (void)
&& (lookup_attribute ("hsa", DECL_ATTRIBUTES (fndecl))
|| lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl))))
{
- char *tmpname;
- gimple launch, call_stmt = gsi_stmt (gsi);
- vec<constructor_elt, va_gc> *v = NULL;
- tree str;
- str = build_string_literal (1, "");
- bool kern_p = lookup_attribute ("hsakernel",
- DECL_ATTRIBUTES (fndecl));
- hsa_cfun.kern_p = kern_p;
- if (!in_lto_p && main_input_filename)
- {
- char *filename;
- const char *part = strrchr (main_input_filename, '/');
- if (!part)
- part = main_input_filename;
- asprintf (&filename, "%s", part);
- char* extension = strchr (filename, '.');
- if (extension)
- {
- strcpy (extension, "\0");
- asprintf (&extension, "%s", ".o\0");
- strcat (filename, extension);
- free (extension);
- str = build_string_literal (strlen(filename)+1,filename);
- free (filename);
- }
- }
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, str);
-
-
- int slen = IDENTIFIER_LENGTH (DECL_ASSEMBLER_NAME (fndecl));
- if (asprintf (&tmpname, "&%s",
- IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fndecl))) < 0)
- gcc_unreachable ();
- sanitize_hsa_name (tmpname + 1);
-
- str = build_string_literal (slen + 2, tmpname);
- free (tmpname);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, str);
- int discard_arguents;
- int num_args = gimple_call_num_args (call_stmt);
- if (kern_p)
- discard_arguents = 2;
- else
- discard_arguents = 0;
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
- size_int (num_args - discard_arguents));
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, null_pointer_node);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, null_pointer_node);
-
- tree desc_initval = build_constructor (hsa_kernel_desc_type, v);
-
- /* Create a new VAR_DECL of type descriptor. */
- char tmp_name[32];
- static unsigned int var_id;
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kd", var_id++);
- tree desc = build_decl (gimple_location (call_stmt), VAR_DECL,
- get_identifier (tmp_name),
- hsa_kernel_desc_type);
- TREE_STATIC (desc) = 1;
- TREE_PUBLIC (desc) = 0;
- DECL_ARTIFICIAL (desc) = 1;
- DECL_IGNORED_P (desc) = 1;
- DECL_EXTERNAL (desc) = 0;
-
- TREE_CONSTANT (desc_initval) = 1;
- TREE_STATIC (desc_initval) = 1;
- DECL_INITIAL (desc) = desc_initval;
- varpool_node::finalize_decl (desc);
- desc = build_fold_addr_expr (desc);
-
- tree grid_size_1, group_size_1;
- tree u32_one = build_int_cst (uint32_type_node, 1);
- if (kern_p)
- {
- discard_arguents = 2;
- int num_args = gimple_call_num_args (call_stmt);
- if (num_args < 2)
- {
- error ("Calls to functions with hsakernel attribute must "
- "have at least two arguments.");
- grid_size_1 = group_size_1 = u32_one;
- }
- else
- {
- grid_size_1 = fold_convert (uint32_type_node,
- gimple_call_arg (call_stmt,
- num_args - 2));
- grid_size_1 = force_gimple_operand_gsi (&gsi, grid_size_1,
- true, NULL_TREE,
- true,
- GSI_SAME_STMT);
- group_size_1 = fold_convert (uint32_type_node,
- gimple_call_arg (call_stmt,
- num_args
- - 1));
- group_size_1 = force_gimple_operand_gsi (&gsi, group_size_1,
- true, NULL_TREE,
- true,
- GSI_SAME_STMT);
- }
- }
- else
- {
- discard_arguents = 0;
- grid_size_1 = build_int_cst (uint32_type_node, 256);
- group_size_1 = build_int_cst (uint32_type_node, 16);
- }
-
-
- /* We fill in range dynamically because later on we'd like to
- decide about the values at run time. */
- tree range = create_tmp_var (hsa_launch_range_type, "__hsa_range");
- tree dimref = build3 (COMPONENT_REF, uint32_type_node,
- range, hsa_range_dimnum_decl, NULL_TREE);
- gsi_insert_before (&gsi,
- gimple_build_assign (dimref, u32_one),
- GSI_SAME_STMT);
- insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 0,
- grid_size_1);
- insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 1,
- u32_one);
- insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 2,
- u32_one);
- insert_store_range_dim (&gsi, range, hsa_range_group_decl, 0,
- group_size_1);
- insert_store_range_dim (&gsi, range, hsa_range_group_decl, 1,
- u32_one);
- insert_store_range_dim (&gsi, range, hsa_range_group_decl, 2,
- u32_one);
- range = build_fold_addr_expr (range);
-
- tree args = create_tmp_var
- (build_array_type_nelts (uint64_type_node,
- gimple_call_num_args (call_stmt)),
- NULL);
-
- for (unsigned i = 0;
- i < gimple_call_num_args (call_stmt) - discard_arguents;
- i++)
- {
- tree arg = gimple_call_arg (call_stmt, i);
- gimple g;
-
- tree r = build4 (ARRAY_REF, uint64_type_node, args,
- size_int (i), NULL_TREE, NULL_TREE);
-
- arg = force_gimple_operand_gsi (&gsi,
- fold_convert (uint64_type_node,
- arg),
- true, NULL_TREE,
- true, GSI_SAME_STMT);
- g = gimple_build_assign (r, arg);
- gsi_insert_before (&gsi, g, GSI_SAME_STMT);
- }
-
- args = build_fold_addr_expr (args);
-
- /* XXX doesn't handle calls with lhs, doesn't remove EH
- edges. */
- launch = gimple_build_call (hsa_launch_fn, 3, desc, range, args);
- gsi_insert_before (&gsi, launch, GSI_SAME_STMT);
- unlink_stmt_vdef (call_stmt);
- gsi_remove (&gsi, true);
+ wrap_hsa_kernel_call (&gsi, fndecl);
changed = true;
}
else
@@ -2615,7 +2575,7 @@ pass_gen_hsail::execute (function *)
DECL_ATTRIBUTES (current_function_decl)))
return generate_hsa ();
else
- return wrap_hsa ();
+ return wrap_all_hsa_calls ();
}
} // anon namespace
@@ -292,7 +292,7 @@ hsa_num_def_ops (hsa_insn_basic *insn)
case BRIG_OPCODE_ACTIVELANECOUNT:
case BRIG_OPCODE_ACTIVELANEID:
case BRIG_OPCODE_ACTIVELANEMASK:
- case BRIG_OPCODE_ACTIVELANESHUFFLE:
+ case BRIG_OPCODE_ACTIVELANEPERMUTE:
return 1; /* ??? */
case BRIG_OPCODE_CALL:
@@ -300,9 +300,6 @@ hsa_num_def_ops (hsa_insn_basic *insn)
case BRIG_OPCODE_ICALL:
return 1; /* ??? */
- case BRIG_OPCODE_LDI:
- return 1; /* ??? */
-
case BRIG_OPCODE_RET:
return 0;
@@ -318,14 +315,9 @@ hsa_num_def_ops (hsa_insn_basic *insn)
case BRIG_OPCODE_PACKETCOMPLETIONSIG:
case BRIG_OPCODE_PACKETID:
case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
- case BRIG_OPCODE_AGENTCOUNT:
- case BRIG_OPCODE_AGENTID:
case BRIG_OPCODE_CASQUEUEWRITEINDEX:
- case BRIG_OPCODE_LDK:
case BRIG_OPCODE_LDQUEUEREADINDEX:
case BRIG_OPCODE_LDQUEUEWRITEINDEX:
- case BRIG_OPCODE_QUEUEID:
- case BRIG_OPCODE_QUEUEPTR:
case BRIG_OPCODE_STQUEUEREADINDEX:
case BRIG_OPCODE_STQUEUEWRITEINDEX:
return 1; /* ??? */
@@ -77,7 +77,7 @@ struct hsa_op_base
unsigned brig_op_offset;
/* The type of a particular operand. */
- BrigKinds16_t kind;
+ BrigKind16_t kind;
};
/* Common abstract ancestor for operands which have a type. */
@@ -103,7 +103,7 @@ template <>
inline bool
is_a_helper <hsa_op_immed *>::test (hsa_op_base *p)
{
- return p->kind == BRIG_KIND_OPERAND_DATA;
+ return p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES;
}
/* HSA register operand. */
@@ -152,7 +152,7 @@ template <>
inline bool
is_a_helper <hsa_op_reg *>::test (hsa_op_base *p)
{
- return p->kind == BRIG_KIND_OPERAND_REG;
+ return p->kind == BRIG_KIND_OPERAND_REGISTER;
}
/* An address HSA operand. */
@@ -617,6 +617,10 @@ hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
hsa_bb *hsa_init_new_bb (basic_block);
void hsa_deinit_compilation_unit_data (void);
+unsigned hsa_get_number_decl_kernel_mappings (void);
+tree hsa_get_decl_kernel_mapping_decl (unsigned i);
+char *hsa_get_decl_kernel_mapping_name (unsigned i);
+void hsa_free_decl_kernel_mapping (void);
/* In hsa-regalloc.c. */
void hsa_regalloc (void);
@@ -60,7 +60,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
- time.c fortran.c affinity.c target.c hsaokra.c
+ time.c fortran.c affinity.c target.c hsa.c
nodist_noinst_HEADERS = libgomp_f.h
nodist_libsubinclude_HEADERS = omp.h
@@ -96,7 +96,7 @@ am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
parallel.lo sections.lo single.lo task.lo team.lo work.lo \
lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
- fortran.lo affinity.lo target.lo hsaokra.lo
+ fortran.lo affinity.lo target.lo hsa.lo
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -320,7 +320,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
- time.c fortran.c affinity.c target.c hsaokra.c
+ time.c fortran.c affinity.c target.c hsa.c
nodist_noinst_HEADERS = libgomp_f.h
nodist_libsubinclude_HEADERS = omp.h
@@ -464,7 +464,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hsaokra.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hsa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
new file mode 100644
@@ -0,0 +1,481 @@
+#include "config.h"
+#include "libgomp.h"
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <assert.h>
+#include "hsa.h"
+#include "hsa_ext_finalize.h"
+
+struct __hsa_launch_attributes
+{
+ /* Number of dimensions the workload has. Maximum number is 3. */
+ uint32_t ndim;
+ /* Size of the grid in the three respective dimensions. */
+ uint32_t gdims[3];
+ /* Size of work-groups in the respective dimensions. */
+ uint32_t wdims[3];
+ /* Number of arguments of the kernel. */
+ uint32_t nargs;
+};
+
+struct hsa_context_info
+{
+ bool initialized;
+ hsa_agent_t agent;
+
+ uint32_t queue_size;
+ hsa_isa_t isa;
+ hsa_queue_t* sync_command_q;
+ hsa_signal_t sync_signal;
+};
+
+struct hsa_program_info
+{
+ bool created, finalized;
+ hsa_ext_program_t handle;
+ hsa_executable_t executable;
+};
+
+struct hsa_image_desc
+{
+ hsa_ext_module_t module;
+ const char *names;
+};
+
+struct hsa_kernel_info
+{
+ bool initialized;
+ const char *name;
+ hsa_executable_symbol_t symbol;
+ uint64_t object;
+ uint32_t kernarg_segment_size;
+ uint32_t group_segment_size;
+ uint32_t private_segment_size;
+ void *kernarg_addr;
+};
+
+struct hsa_image_info
+{
+ bool initialized;
+ void ***host_functions;
+ struct hsa_image_desc *image_descriptor;
+ struct hsa_kernel_info *kernels;
+};
+
+/* Print to stderr information about what is going on when true. */
+
+static bool debug;
+
+static struct hsa_context_info hsa_context;
+static struct hsa_program_info hsa_program;
+
+/* FIXME: Currently we allow only one HSA image (module, HSA object file).
+ Once we move to libgomp, it will be taking care of host function <-> hsa
+ kernel mapping and we will automatically have more. */
+static struct hsa_image_info image_info;
+
+/* Callback of hsa_iterate_agents, if AGENT is a GPU device, store it to
+ hsa_context.agent. */
+
+static hsa_status_t
+find_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused)))
+{
+ hsa_device_type_t device_type;
+ hsa_status_t stat = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE,
+ &device_type);
+ if (stat == HSA_STATUS_SUCCESS && device_type == HSA_DEVICE_TYPE_GPU)
+ {
+ hsa_context.agent = agent;
+ return HSA_STATUS_INFO_BREAK;
+ }
+ return HSA_STATUS_SUCCESS;
+}
+
+/* Callback of dispatch queue to report errors. */
+
+static void
+queue_callback(hsa_status_t status, hsa_queue_t* queue, void* data) {
+ const char* message;
+ hsa_status_string (status, &message);
+ fprintf(stderr, "Error at queue %llu: %s", (unsigned long long) queue->id,
+ message);
+}
+
+
+/* Initialize context for running HSA kernels. */
+
+static void
+init_hsa_context (void)
+{
+ hsa_status_t status;
+
+ if (getenv ("HSA_DEBUG"))
+ debug = true;
+ else
+ debug = false;
+
+ status = hsa_init ();
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("HSA initialization failed");
+ if (debug)
+ fprintf (stderr, "HSA run-time initialized\n");
+ status = hsa_iterate_agents (find_gpu_agent, NULL);
+ if (status != HSA_STATUS_INFO_BREAK)
+ gomp_fatal ("Error searching for a GPU HSA agent");
+
+ if (debug)
+ {
+ char name[64];
+ memset (&name, 0, sizeof (name));
+ status = hsa_agent_get_info (hsa_context.agent, HSA_AGENT_INFO_NAME, name);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Error requesting the name of the HSA agent");
+ fprintf (stderr, "Selected agent %s\n", name);
+ }
+ status = hsa_agent_get_info(hsa_context.agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+ &hsa_context.queue_size);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Error requesting maximum queue size of the HSA agent");
+ if (debug)
+ fprintf (stderr, "The maximum queue length is %u\n",
+ (unsigned int) hsa_context.queue_size);
+ status = hsa_agent_get_info(hsa_context.agent, HSA_AGENT_INFO_ISA,
+ &hsa_context.isa);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Error querying the ISA of the agent");
+
+ status = hsa_queue_create (hsa_context.agent, hsa_context.queue_size,
+ HSA_QUEUE_TYPE_SINGLE, queue_callback, NULL,
+ UINT32_MAX, UINT32_MAX,
+ &hsa_context.sync_command_q);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Error creating command queue");
+
+ status = hsa_signal_create (1, 0, NULL, &hsa_context.sync_signal);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Error creating the HSA sync signal");
+
+ if (debug)
+ fprintf (stderr, "HSA context initialized, queue has id %llu\n",
+ (long long unsigned) hsa_context.sync_command_q->id);
+
+ hsa_context.initialized = true;
+}
+
+static void
+create_hsa_program (void)
+{
+ hsa_status_t status;
+ assert (hsa_context.initialized);
+ status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
+ HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+ NULL, &hsa_program.handle);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not create an HSA program");
+ if (debug)
+ fprintf (stderr, "Created a finalizer program\n");
+
+ hsa_program.created = true;
+}
+
+
+static void
+finalize_hsa_program (void)
+{
+ hsa_ext_control_directives_t control_directives;
+ hsa_code_object_t code_object;
+ hsa_status_t status;
+
+ assert (hsa_program.created);
+ memset (&control_directives, 0, sizeof (control_directives));
+ status = hsa_ext_program_finalize(hsa_program.handle, hsa_context.isa,
+ HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
+ control_directives, "",
+ HSA_CODE_OBJECT_TYPE_PROGRAM,
+ &code_object);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Finalization of the HSA program failed");
+ if (debug)
+ fprintf (stderr, "Finalization done\n");
+ hsa_ext_program_destroy(hsa_program.handle);
+
+ status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
+ "", &hsa_program.executable);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not create HSA executable");
+ status = hsa_executable_load_code_object(hsa_program.executable,
+ hsa_context.agent, code_object, "");
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not add a code object to the HSA executable");
+ status = hsa_executable_freeze(hsa_program.executable, "");
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not add a code object to the HSA executable");
+
+ if (debug)
+ fprintf (stderr, "Froze HSA executable with the finalized code object\n");
+
+ hsa_program.finalized = true;
+}
+
+static void
+init_hsa_image (struct hsa_image_info *ii)
+{
+ hsa_status_t status;
+ const char *p;
+ int count = 0;
+ struct hsa_kernel_info *kernel;
+
+ if (hsa_program.finalized)
+ gomp_fatal ("Sorry, re-finalization not yet supported.");
+ if (!hsa_program.created)
+ create_hsa_program ();
+ status = hsa_ext_program_add_module(hsa_program.handle,
+ image_info.image_descriptor->module);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not add a module to the HSA program");
+ if (debug)
+ fprintf (stderr, "Added a module to the HSA program\n");
+
+ p = ii->image_descriptor->names;
+ while (*p)
+ {
+ count++;
+ do
+ p++;
+ while (*p);
+ p++;
+ }
+
+ if (debug)
+ fprintf (stderr, "Encountered %d kernels in an image\n", count);
+
+ ii->kernels = gomp_malloc_cleared (sizeof (struct hsa_kernel_info) * count);
+ if (!ii->kernels)
+ gomp_fatal ("Could not allocate memory for HSA kertnel descriptors");
+
+ p = ii->image_descriptor->names;
+ kernel = ii->kernels;
+ while (*p)
+ {
+ kernel->name = p;
+ kernel++;
+ do
+ p++;
+ while (*p);
+ p++;
+ }
+
+ ii->initialized = true;
+}
+
+/* Callback of hsa_agent_iterate_regions. Determines if a memory region can be
+ used for kernarg allocations. */
+
+static hsa_status_t get_kernarg_memory_region(hsa_region_t region, void* data)
+{
+ hsa_status_t status;
+ hsa_region_segment_t segment;
+
+ status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
+ if (status != HSA_STATUS_SUCCESS)
+ return status;
+ if (segment != HSA_REGION_SEGMENT_GLOBAL)
+ return HSA_STATUS_SUCCESS;
+
+ uint32_t flags;
+ status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
+ if (status != HSA_STATUS_SUCCESS)
+ return status;
+ if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
+ {
+ hsa_region_t* ret = (hsa_region_t*) data;
+ *ret = region;
+ return HSA_STATUS_INFO_BREAK;
+ }
+ return HSA_STATUS_SUCCESS;
+}
+
+
+static void
+init_hsa_kernel (struct hsa_kernel_info *ki, struct hsa_image_info *ii)
+{
+ hsa_status_t status;
+
+ status = hsa_executable_get_symbol (hsa_program.executable, NULL,
+ ki->name, hsa_context.agent, 0,
+ &ki->symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not find symbol for kernel in the code object");
+ if (debug)
+ fprintf (stderr, "Located kernel %s\n", ki->name);
+
+ status = hsa_executable_symbol_get_info
+ (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &ki->object);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not extract a kernel object from its symbol");
+ status = hsa_executable_symbol_get_info
+ (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
+ &ki->kernarg_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not get infor about kernel argument size");
+ status = hsa_executable_symbol_get_info
+ (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
+ &ki->group_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not get info about kernel group segment size");
+ status = hsa_executable_symbol_get_info
+ (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
+ &ki->private_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not get info about kernel private segment size");
+
+ hsa_region_t kernarg_region;
+ kernarg_region.handle = (uint64_t) -1;
+ status = hsa_agent_iterate_regions (hsa_context.agent,
+ get_kernarg_memory_region,
+ &kernarg_region);
+ if (kernarg_region.handle == (uint64_t) -1)
+ gomp_fatal ("Could not find suitable memory region for kernel arguments");
+
+ /* Allocate the kernel argument buffer from the correct region. */
+ status = hsa_memory_allocate (kernarg_region, ki->kernarg_segment_size,
+ &ki->kernarg_addr);
+ if (status != HSA_STATUS_SUCCESS)
+ gomp_fatal ("Could not allocate memory for kernel arguments");
+
+ if (debug)
+ {
+ fprintf (stderr, "Kernel structure for %s fully initialized\n", ki->name);
+ fprintf (stderr, " group_segment_size: %u\n",
+ (unsigned) ki->group_segment_size);
+ fprintf (stderr, " private_segment_size: %u\n",
+ (unsigned) ki->private_segment_size);
+ fprintf (stderr, " kernarg_segment_size: %u\n",
+ (unsigned) ki->kernarg_segment_size);
+ fprintf (stderr, " kernarg_addr: %p\n", ki->kernarg_addr);
+ }
+ ki->initialized = true;
+}
+
+void
+__hsa_launch_kernel (void *host_fn, struct __hsa_launch_attributes *attrs,
+ uint64_t *args)
+{
+ struct hsa_kernel_info *ki;
+ hsa_kernel_dispatch_packet_t *packet;
+ void **hf;
+ uint16_t header;
+ uint64_t index;
+ int i;
+
+ if (!image_info.host_functions)
+ gomp_fatal ("Call to launch_kernel before register_image");
+
+ if (!hsa_context.initialized)
+ init_hsa_context ();
+ if (!image_info.initialized)
+ init_hsa_image (&image_info);
+ if (!hsa_program.finalized)
+ finalize_hsa_program ();
+
+ i = 0;
+ hf = image_info.host_functions[0];
+ while (*hf != host_fn)
+ {
+ if (hf == image_info.host_functions[1])
+ gomp_fatal ("Could not map host function to an HSA kernel");
+ hf++;
+ i++;
+ }
+ if (debug)
+ fprintf (stderr, "Identified kernel %d\n", i);
+ ki = &image_info.kernels[i];
+
+ if (!ki->initialized)
+ init_hsa_kernel (ki, &image_info);
+
+ index = hsa_queue_add_write_index_relaxed (hsa_context.sync_command_q, 1);
+ if (debug)
+ fprintf (stderr, "Got AQL index %llu\n", (long long int) index);
+ assert ((index & ~(hsa_context.sync_command_q->size - 1)) == 0);
+ packet = ((hsa_kernel_dispatch_packet_t*) hsa_context
+ .sync_command_q->base_address) + index;
+ hsa_signal_store_relaxed (hsa_context.sync_signal, 1);
+
+ memset (((uint8_t *)packet) + 4, 0, sizeof (*packet) - 4);
+ packet->setup |= (uint16_t) attrs->ndim
+ << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+ packet->grid_size_x = attrs->gdims[0];
+ packet->workgroup_size_x = attrs->wdims[0];
+ if (attrs->ndim > 1)
+ {
+ packet->grid_size_y = attrs->gdims[1];
+ packet->workgroup_size_y = attrs->wdims[1];
+ }
+ else
+ {
+ packet->grid_size_y = 1;
+ packet->workgroup_size_y = 1;
+ }
+ if (attrs->ndim > 2)
+ {
+ packet->grid_size_z = attrs->gdims[2];
+ packet->workgroup_size_z = attrs->wdims[2];
+ }
+ else
+ {
+ packet->grid_size_z = 1;
+ packet->workgroup_size_z = 1;
+ }
+
+ packet->private_segment_size = ki->private_segment_size;
+ packet->group_segment_size = ki->group_segment_size;
+ packet->kernel_object = ki->object;
+ packet->kernarg_address = ki->kernarg_addr;
+ packet->completion_signal = hsa_context.sync_signal;
+
+ if (debug)
+ fprintf (stderr, "Copying %u arguments, total %llu bytes, from %p\n",
+ (unsigned) attrs->nargs,
+ (unsigned long long) sizeof(uint64_t) * attrs->nargs,
+ (void *) args);
+ memcpy (ki->kernarg_addr, args, sizeof(uint64_t) * attrs->nargs);
+
+ header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+ if (debug)
+ fprintf (stderr, "Going to dispatch the kernel\n");
+
+ __atomic_store_n ((uint16_t*)(&packet->header), header, __ATOMIC_RELEASE);
+ hsa_signal_store_relaxed (hsa_context.sync_command_q->doorbell_signal, index);
+
+ if (debug)
+ fprintf (stderr, "Kernel dispatched, waiting for completion\n");
+
+ hsa_signal_wait_acquire(hsa_context.sync_signal, HSA_SIGNAL_CONDITION_LT, 1,
+ UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
+ if (debug)
+ fprintf (stderr, "Kernel returned\n");
+}
+
+void
+__hsa_register_image (void *host_table,
+ /* enum offload_target_type target_type,*/
+ void *target_data)
+{
+ /*
+ fprintf (stderr, "__hsa_register_image %p %p called\n",
+ host_table, target_data);
+ */
+ if (!host_table || !target_data)
+ gomp_fatal ("Invalid image registration parameters");
+ if (image_info.host_functions)
+ gomp_fatal ("At this point we only allow one HSA module per program");
+ image_info.host_functions = host_table;
+ image_info.image_descriptor = target_data;
+}
new file mode 100644
@@ -0,0 +1,3724 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+// Copyright 2014 ADVANCED MICRO DEVICES, INC.
+//
+// AMD is granting you permission to use this software and documentation(if any)
+// (collectively, the "Materials") pursuant to the terms and conditions of the
+// Software License Agreement included with the Materials.If you do not have a
+// copy of the Software License Agreement, contact your AMD representative for a
+// copy.
+//
+// You agree that you will not reverse engineer or decompile the Materials, in
+// whole or in part, except as allowed by applicable law.
+//
+// WARRANTY DISCLAIMER : THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND.AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY,
+// INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON - INFRINGEMENT, THAT THE
+// SOFTWARE WILL RUN UNINTERRUPTED OR ERROR - FREE OR WARRANTIES ARISING FROM
+// CUSTOM OF TRADE OR COURSE OF USAGE.THE ENTIRE RISK ASSOCIATED WITH THE USE OF
+// THE SOFTWARE IS ASSUMED BY YOU.Some jurisdictions do not allow the exclusion
+// of implied warranties, so the above exclusion may not apply to You.
+//
+// LIMITATION OF LIABILITY AND INDEMNIFICATION : AMD AND ITS LICENSORS WILL NOT,
+// UNDER ANY CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT,
+// INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF
+// THE SOFTWARE OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.In no event shall AMD's total
+// liability to You for all damages, losses, and causes of action (whether in
+// contract, tort (including negligence) or otherwise) exceed the amount of $100
+// USD. You agree to defend, indemnify and hold harmless AMD and its licensors,
+// and any of their directors, officers, employees, affiliates or agents from
+// and against any and all loss, damage, liability and other expenses (including
+// reasonable attorneys' fees), resulting from Your use of the Software or
+// violation of the terms and conditions of this Agreement.
+//
+// U.S.GOVERNMENT RESTRICTED RIGHTS : The Materials are provided with
+// "RESTRICTED RIGHTS." Use, duplication, or disclosure by the Government is
+// subject to the restrictions as set forth in FAR 52.227 - 14 and DFAR252.227 -
+// 7013, et seq., or its successor.Use of the Materials by the Government
+// constitutes acknowledgement of AMD's proprietary rights in them.
+//
+// EXPORT RESTRICTIONS: The Materials may be subject to export restrictions as
+// stated in the Software License Agreement.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef HSA_RUNTIME_INC_HSA_H_
+#define HSA_RUNTIME_INC_HSA_H_
+
+#include <stddef.h> /* size_t */
+#include <stdint.h> /* uintXX_t */
+#ifndef __cplusplus
+#include <stdbool.h>
+#endif /* __cplusplus */
+
+// Placeholder for calling convention and import macros
+#define HSA_CALL
+#undef HSA_API
+#define HSA_API HSA_CALL
+
+// Detect and set large model builds.
+#undef HSA_LARGE_MODEL
+#if defined(__LP64__) || defined(_M_X64)
+#define HSA_LARGE_MODEL
+#endif
+
+// Try to detect CPU endianness
+#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU)
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
+ defined(_M_X64)
+#define LITTLEENDIAN_CPU
+#endif
+#endif
+
+#undef HSA_LITTLE_ENDIAN
+#if defined(LITTLEENDIAN_CPU)
+#define HSA_LITTLE_ENDIAN
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+#define OBSIDIAN_RUNTIME
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** \defgroup status Runtime Notifications
+ * @{
+ */
+
+/**
+ * @brief Status codes.
+ */
+typedef enum {
+ /**
+ * The function has been executed successfully.
+ */
+ HSA_STATUS_SUCCESS = 0x0,
+ /**
+ * A traversal over a list of elements has been interrupted by the
+ * application before completing.
+ */
+ HSA_STATUS_INFO_BREAK = 0x1,
+ /**
+ * A generic error has occurred.
+ */
+ HSA_STATUS_ERROR = 0x1000,
+ /**
+ * One of the actual arguments does not meet a precondition stated in the
+ * documentation of the corresponding formal argument.
+ */
+ HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001,
+ /**
+ * The requested queue creation is not valid.
+ */
+ HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002,
+ /**
+ * The requested allocation is not valid.
+ */
+ HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003,
+ /**
+ * The agent is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_AGENT = 0x1004,
+ /**
+ * The memory region is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_REGION = 0x1005,
+ /**
+ * The signal is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006,
+ /**
+ * The queue is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007,
+ /**
+ * The HSA runtime failed to allocate the necessary resources. This error
+ * may also occur when the HSA runtime needs to spawn threads or create
+ * internal OS-specific events.
+ */
+ HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008,
+ /**
+ * The AQL packet is malformed.
+ */
+ HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009,
+ /**
+ * An error has been detected while releasing a resource.
+ */
+ HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A,
+ /**
+ * An API other than ::hsa_init has been invoked while the reference count
+ * of the HSA runtime is 0.
+ */
+ HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B,
+ /**
+ * The maximum reference count for the object has been reached.
+ */
+ HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C,
+ /**
+ * The arguments passed to a functions are not compatible.
+ */
+ HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D,
+ /**
+ * The index is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_INDEX = 0x100E,
+ /**
+ * The instruction set architecture is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_ISA = 0x100F,
+ /**
+ * The instruction set architecture name is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017,
+ /**
+ * The code object is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010,
+ /**
+ * The executable is invalid.
+ */
+ HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011,
+ /**
+ * The executable is frozen.
+ */
+ HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012,
+ /**
+ * There is no symbol with the given name.
+ */
+ HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013,
+ /**
+ * The variable is already defined.
+ */
+ HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014,
+ /**
+ * The variable is undefined.
+ */
+ HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015,
+ /**
+ * An HSAIL operation resulted on a hardware exception.
+ */
+ HSA_STATUS_ERROR_EXCEPTION = 0x1016
+} hsa_status_t;
+
+/**
+ * @brief Query additional information about a status code.
+ *
+ * @param[in] status Status code.
+ *
+ * @param[out] status_string A NUL-terminated string that describes the error
+ * status.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p status is an invalid
+ * status code, or @p status_string is NULL.
+ */
+hsa_status_t HSA_API
+ hsa_status_string(hsa_status_t status, const char **status_string);
+
+/** @} */
+
+/** \defgroup common Common Definitions
+ * @{
+ */
+
+/**
+ * @brief Three-dimensional coordinate.
+ */
+typedef struct hsa_dim3_s {
+ /**
+ * X dimension.
+ */
+ uint32_t x;
+
+ /**
+ * Y dimension.
+ */
+ uint32_t y;
+
+ /**
+ * Z dimension.
+ */
+ uint32_t z;
+} hsa_dim3_t;
+
+/**
+ * @brief Access permissions.
+ */
+typedef enum {
+ /**
+ * Read-only access.
+ */
+ HSA_ACCESS_PERMISSION_RO = 1,
+ /**
+ * Write-only access.
+ */
+ HSA_ACCESS_PERMISSION_WO = 2,
+ /**
+ * Read and write access.
+ */
+ HSA_ACCESS_PERMISSION_RW = 3
+} hsa_access_permission_t;
+
+/** @} **/
+
+/** \defgroup initshutdown Initialization and Shut Down
+ * @{
+ */
+
+/**
+ * @brief Initialize the HSA runtime.
+ *
+ * @details Initializes the HSA runtime if it is not already initialized, and
+ * increases the reference counter associated with the HSA runtime for the
+ * current process. Invocation of any HSA function other than ::hsa_init results
+ * in undefined behavior if the current HSA runtime reference counter is less
+ * than one.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate
+ * the resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_REFCOUNT_OVERFLOW The HSA runtime reference
+ * count reaches INT32_MAX.
+ */
+hsa_status_t HSA_API hsa_init();
+
+/**
+ * @brief Shut down the HSA runtime.
+ *
+ * @details Decreases the reference count of the HSA runtime instance. When the
+ * reference count reaches 0, the HSA runtime is no longer considered valid
+ * but the application might call ::hsa_init to initialize the HSA runtime
+ * again.
+ *
+ * Once the reference count of the HSA runtime reaches 0, all the resources
+ * associated with it (queues, signals, agent information, etc.) are
+ * considered invalid and any attempt to reference them in subsequent API calls
+ * results in undefined behavior. When the reference count reaches 0, the HSA
+ * runtime may release resources associated with it.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ */
+hsa_status_t HSA_API hsa_shut_down();
+
+/** @} **/
+
+/** \defgroup agentinfo System and Agent Information
+ * @{
+ */
+
+/**
+ * @brief Endianness. A convention used to interpret the bytes making up a data
+ * word.
+ */
+typedef enum {
+ /**
+ * The least significant byte is stored in the smallest address.
+ */
+ HSA_ENDIANNESS_LITTLE = 0,
+ /**
+ * The most significant byte is stored in the smallest address.
+ */
+ HSA_ENDIANNESS_BIG = 1
+} hsa_endianness_t;
+
+/**
+ * @brief Machine model. A machine model determines the size of certain data
+ * types in HSA runtime and an agent.
+ */
+typedef enum {
+ /**
+ * Small machine model. Addresses use 32 bits.
+ */
+ HSA_MACHINE_MODEL_SMALL = 0,
+ /**
+ * Large machine model. Addresses use 64 bits.
+ */
+ HSA_MACHINE_MODEL_LARGE = 1
+} hsa_machine_model_t;
+
+/**
+ * @brief Profile. A profile indicates a particular level of feature
+ * support. For example, in the base profile the application must use the HSA
+ * runtime allocator to reserve Shared Virtual Memory, while in the full profile
+ * any host pointer can be shared across all the agents.
+ */
+typedef enum {
+ /**
+ * Base profile.
+ */
+ HSA_PROFILE_BASE = 0,
+ /**
+ * Full profile.
+ */
+ HSA_PROFILE_FULL = 1
+} hsa_profile_t;
+
+/**
+ * @brief System attributes.
+ */
+typedef enum {
+ /**
+ * Major version of the HSA runtime specification supported by the
+ * implementation. The type of this attribute is uint16_t.
+ */
+ HSA_SYSTEM_INFO_VERSION_MAJOR = 0,
+ /**
+ * Minor version of the HSA runtime specification supported by the
+ * implementation. The type of this attribute is uint16_t.
+ */
+ HSA_SYSTEM_INFO_VERSION_MINOR = 1,
+ /**
+ * Current timestamp. The value of this attribute monotonically increases at a
+ * constant rate. The type of this attribute is uint64_t.
+ */
+ HSA_SYSTEM_INFO_TIMESTAMP = 2,
+ /**
+ * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is
+ * in the range 1-400MHz. The type of this attribute is uint64_t.
+ */
+ HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3,
+ /**
+ * Maximum duration of a signal wait operation. Expressed as a count based on
+ * the timestamp frequency. The type of this attribute is uint64_t.
+ */
+ HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4,
+ /**
+ * Endianness of the system. The type of this attribute us ::hsa_endianness_t.
+ */
+ HSA_SYSTEM_INFO_ENDIANNESS = 5,
+ /**
+ * Machine model supported by the HSA runtime. The type of this attribute is
+ * ::hsa_machine_model_t.
+ */
+ HSA_SYSTEM_INFO_MACHINE_MODEL = 6,
+ /**
+ * Bit-mask indicating which extensions are supported by the
+ * implementation. An extension with an ID of @p i is supported if the bit at
+ * position @p i is set. The type of this attribute is uint8_t[128].
+ */
+ HSA_SYSTEM_INFO_EXTENSIONS = 7
+} hsa_system_info_t;
+
+/**
+ * @brief Get the current value of a system attribute.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * system attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API
+ hsa_system_get_info(hsa_system_info_t attribute, void *value);
+
+/**
+ * @brief HSA extensions.
+ */
+typedef enum {
+ /**
+ * Finalizer extension.
+ */
+ HSA_EXTENSION_FINALIZER = 0,
+ /**
+ * Images extension.
+ */
+ HSA_EXTENSION_IMAGES = 1,
+ HSA_EXTENSION_AMD_PROFILER = 2
+} hsa_extension_t;
+
+/**
+ * @brief Query if a given version of an extension is supported by the HSA
+ * implementation.
+ *
+ * @param[in] extension Extension identifier.
+ *
+ * @param[in] version_major Major version number.
+ *
+ * @param[in] version_minor Minor version number.
+ *
+ * @param[out] result Pointer to a memory location where the HSA runtime stores
+ * the result of the check. The result is true if the specified version of the
+ * extension is supported, and false otherwise.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
+ * extension, or @p result is NULL.
+ */
+hsa_status_t HSA_API
+ hsa_system_extension_supported(uint16_t extension, uint16_t version_major,
+ uint16_t version_minor, bool *result);
+
+/**
+ * @brief Retrieve the function pointers corresponding to a given version of an
+ * extension. Portable applications are expected to invoke the extension API
+ * using the returned function pointers
+ *
+ * @details The application is responsible for verifying that the given version
+ * of the extension is supported by the HSA implementation (see
+ * ::hsa_system_extension_supported). If the given combination of extension,
+ * major version, and minor version is not supported by the implementation, the
+ * behavior is undefined.
+ *
+ * @param[in] extension Extension identifier.
+ *
+ * @param[in] version_major Major version number for which to retrieve the
+ * function pointer table.
+ *
+ * @param[in] version_minor Minor version number for which to retrieve the
+ * function pointer table.
+ *
+ * @param[out] table Pointer to an application-allocated function pointer table
+ * that is populated by the HSA runtime. Must not be NULL. The memory associated
+ * with table can be reused or freed after the function returns.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
+ * extension, or @p table is NULL.
+ */
+hsa_status_t HSA_API
+ hsa_system_get_extension_table(uint16_t extension, uint16_t version_major,
+ uint16_t version_minor, void *table);
+
+/**
+ * @brief Opaque handle representing an agent, a device that participates in
+ * the HSA memory model. An agent can submit AQL packets for execution, and
+ * may also accept AQL packets for execution (agent dispatch packets or kernel
+ * dispatch packets launching HSAIL-derived binaries).
+ */
+typedef struct hsa_agent_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_agent_t;
+
+/**
+ * @brief Agent features.
+ */
+typedef enum {
+ /**
+ * The agent supports AQL packets of kernel dispatch type. If this
+ * feature is enabled, the agent is also a kernel agent.
+ */
+ HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1,
+ /**
+ * The agent supports AQL packets of agent dispatch type.
+ */
+ HSA_AGENT_FEATURE_AGENT_DISPATCH = 2
+} hsa_agent_feature_t;
+
+/**
+ * @brief Hardware device type.
+ */
+typedef enum {
+ /**
+ * CPU device.
+ */
+ HSA_DEVICE_TYPE_CPU = 0,
+ /**
+ * GPU device.
+ */
+ HSA_DEVICE_TYPE_GPU = 1,
+ /**
+ * DSP device.
+ */
+ HSA_DEVICE_TYPE_DSP = 2
+} hsa_device_type_t;
+
+/**
+ * @brief Default floating-point rounding mode.
+ */
+typedef enum {
+ /**
+ * Use a default floating-point rounding mode specified elsewhere.
+ */
+ HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0,
+ /**
+ * Operations that specify the default floating-point mode are rounded to zero
+ * by default.
+ */
+ HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1,
+ /**
+ * Operations that specify the default floating-point mode are rounded to the
+ * nearest representable number and that ties should be broken by selecting
+ * the value with an even least significant bit.
+ */
+ HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2
+} hsa_default_float_rounding_mode_t;
+
+/**
+ * @brief Agent attributes.
+ */
+typedef enum {
+ /**
+ * Agent name. The type of this attribute is a NUL-terminated char[64]. If
+ * the name of the agent uses less than 63 characters, the rest of the
+ * array must be filled with NULs.
+ */
+ HSA_AGENT_INFO_NAME = 0,
+ /**
+ * Name of vendor. The type of this attribute is a NUL-terminated char[64]. If
+ * the name of the vendor uses less than 63 characters, the rest of the array
+ * must be filled with NULs.
+ */
+ HSA_AGENT_INFO_VENDOR_NAME = 1,
+ /**
+ * Agent capability. The type of this attribute is ::hsa_agent_feature_t.
+ */
+ HSA_AGENT_INFO_FEATURE = 2,
+ /**
+ * Machine model supported by the agent. The type of this attribute is
+ * ::hsa_machine_model_t.
+ */
+ HSA_AGENT_INFO_MACHINE_MODEL = 3,
+ /**
+ * Profile supported by the agent. The type of this attribute is
+ * ::hsa_profile_t.
+ */
+ HSA_AGENT_INFO_PROFILE = 4,
+ /**
+ * Default floating-point rounding mode. The type of this attribute is
+ * ::hsa_default_float_rounding_mode_t, but the value
+ * ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed.
+ */
+ HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5,
+ /**
+ * Default floating-point rounding modes supported by the agent in the Base
+ * profile. The type of this attribute is a mask of
+ * ::hsa_default_float_rounding_mode_t. The default floating-point rounding
+ * mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not be set.
+ */
+ HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23,
+ /**
+ * Flag indicating that the f16 HSAIL operation is at least as fast as the
+ * f32 operation in the current agent. The value of this attribute is
+ * undefined if the agent is not a kernel agent. The type of this
+ * attribute is bool.
+ */
+ HSA_AGENT_INFO_FAST_F16_OPERATION = 24,
+ /**
+ * Number of work-items in a wavefront. Must be a power of 2 in the range
+ * [1,256]. The value of this attribute is undefined if the agent is not
+ * a kernel agent. The type of this attribute is uint32_t.
+ */
+ HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
+ /**
+ * Maximum number of work-items of each dimension of a work-group. Each
+ * maximum must be greater than 0. No maximum can exceed the value of
+ * ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is
+ * undefined if the agent is not a kernel agent. The type of this
+ * attribute is uint16_t[3].
+ */
+ HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
+ /**
+ * Maximum total number of work-items in a work-group. The value of this
+ * attribute is undefined if the agent is not a kernel agent. The type
+ * of this attribute is uint32_t.
+ */
+ HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8,
+ /**
+ * Maximum number of work-items of each dimension of a grid. Each maximum must
+ * be greater than 0, and must not be smaller than the corresponding value in
+ * ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
+ * ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined if
+ * the agent is not a kernel agent. The type of this attribute is
+ * ::hsa_dim3_t.
+ */
+ HSA_AGENT_INFO_GRID_MAX_DIM = 9,
+ /**
+ * Maximum total number of work-items in a grid. The value of this attribute
+ * is undefined if the agent is not a kernel agent. The type of this
+ * attribute is uint32_t.
+ */
+ HSA_AGENT_INFO_GRID_MAX_SIZE = 10,
+ /**
+ * Maximum number of fbarriers per work-group. Must be at least 32. The value
+ * of this attribute is undefined if the agent is not a kernel agent. The
+ * type of this attribute is uint32_t.
+ */
+ HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11,
+ /**
+ * Maximum number of queues that can be active (created but not destroyed) at
+ * one time in the agent. The type of this attribute is uint32_t.
+ */
+ HSA_AGENT_INFO_QUEUES_MAX = 12,
+ /**
+ * Minimum number of packets that a queue created in the agent
+ * can hold. Must be a power of 2 greater than 0. Must not exceed
+ * the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this
+ * attribute is uint32_t.
+ */
+ HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13,
+ /**
+ * Maximum number of packets that a queue created in the agent can
+ * hold. Must be a power of 2 greater than 0. The type of this attribute
+ * is uint32_t.
+ */
+ HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14,
+ /**
+ * Type of a queue created in the agent. The type of this attribute is
+ * ::hsa_queue_type_t.
+ */
+ HSA_AGENT_INFO_QUEUE_TYPE = 15,
+ /**
+ * Identifier of the NUMA node associated with the agent. The type of this
+ * attribute is uint32_t.
+ */
+ HSA_AGENT_INFO_NODE = 16,
+ /**
+ * Type of hardware device associated with the agent. The type of this
+ * attribute is ::hsa_device_type_t.
+ */
+ HSA_AGENT_INFO_DEVICE = 17,
+ /**
+ * Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size
+ * of 0 for a particular level indicates that there is no cache information
+ * for that level. The type of this attribute is uint32_t[4].
+ */
+ HSA_AGENT_INFO_CACHE_SIZE = 18,
+ /**
+ * Instruction set architecture of the agent. The type of this attribute
+ * is ::hsa_isa_t.
+ */
+ HSA_AGENT_INFO_ISA = 19,
+ /**
+ * Bit-mask indicating which extensions are supported by the agent. An
+ * extension with an ID of @p i is supported if the bit at position @p i is
+ * set. The type of this attribute is uint8_t[128].
+ */
+ HSA_AGENT_INFO_EXTENSIONS = 20,
+ /**
+ * Major version of the HSA runtime specification supported by the
+ * agent. The type of this attribute is uint16_t.
+ */
+ HSA_AGENT_INFO_VERSION_MAJOR = 21,
+ /**
+ * Minor version of the HSA runtime specification supported by the
+ * agent. The type of this attribute is uint16_t.
+ */
+ HSA_AGENT_INFO_VERSION_MINOR = 22,
+ HSA_AGENT_INFO_COUNT = 25
+} hsa_agent_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given agent.
+ *
+ * @param[in] agent A valid agent.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * agent attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent,
+ hsa_agent_info_t attribute,
+ void *value);
+
+/**
+ * @brief Iterate over the available agents, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] callback Callback to be invoked once per agent. The HSA
+ * runtime passes two arguments to the callback, the agent and the
+ * application data. If @p callback returns a status other than
+ * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
+ * ::hsa_iterate_agents returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API
+ hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void *data),
+ void *data);
+
+/*
+
+// If we do not know the size of an attribute, we need to query it first
+// Note: this API will not be in the spec unless needed
+hsa_status_t HSA_API hsa_agent_get_info_size(
+ hsa_agent_t agent,
+ hsa_agent_info_t attribute,
+ size_t* size);
+
+// Set the value of an agents attribute
+// Note: this API will not be in the spec unless needed
+hsa_status_t HSA_API hsa_agent_set_info(
+ hsa_agent_t agent,
+ hsa_agent_info_t attribute,
+ void* value);
+
+*/
+
+/**
+ * @brief Exception policies applied in the presence of hardware exceptions.
+ */
+typedef enum {
+ /**
+ * If a hardware exception is detected, a work-item signals an exception.
+ */
+ HSA_EXCEPTION_POLICY_BREAK = 1,
+ /**
+ * If a hardware exception is detected, a hardware status bit is set.
+ */
+ HSA_EXCEPTION_POLICY_DETECT = 2
+} hsa_exception_policy_t;
+
+/**
+ * @brief Retrieve the exception policy support for a given combination of
+ * agent and profile
+ *
+ * @param[in] agent Agent.
+ *
+ * @param[in] profile Profile.
+ *
+ * @param[out] mask Pointer to a memory location where the HSA runtime stores a
+ * mask of ::hsa_exception_policy_t values. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
+ * profile, or @p mask is NULL.
+ *
+ */
+hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent,
+ hsa_profile_t profile,
+ uint16_t *mask);
+
+/**
+ * @brief Query if a given version of an extension is supported by an agent
+ *
+ * @param[in] extension Extension identifier.
+ *
+ * @param[in] agent Agent.
+ *
+ * @param[in] version_major Major version number.
+ *
+ * @param[in] version_minor Minor version number.
+ *
+ * @param[out] result Pointer to a memory location where the HSA runtime stores
+ * the result of the check. The result is true if the specified version of the
+ * extension is supported, and false otherwise.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
+ * extension, or @p result is NULL.
+ */
+hsa_status_t HSA_API
+ hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent,
+ uint16_t version_major,
+ uint16_t version_minor, bool *result);
+
+/** @} */
+
+/** \defgroup signals Signals
+ * @{
+ */
+
+/**
+ * @brief Signal handle.
+ */
+typedef struct hsa_signal_s {
+ /**
+ * Opaque handle. The value 0 is reserved.
+ */
+ uint64_t handle;
+} hsa_signal_t;
+
+/**
+ * @brief Signal value. The value occupies 32 bits in small machine mode, and 64
+ * bits in large machine mode.
+ */
+#ifdef HSA_LARGE_MODEL
+typedef int64_t hsa_signal_value_t;
+#else
+typedef int32_t hsa_signal_value_t;
+#endif
+
+/**
+ * @brief Create a signal.
+ *
+ * @param[in] initial_value Initial value of the signal.
+ *
+ * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
+ * any agent might wait on the signal.
+ *
+ * @param[in] consumers List of agents that might consume (wait on) the
+ * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
+ * HSA runtime might use the list to optimize the handling of the signal
+ * object. If an agent not listed in @p consumers waits on the returned
+ * signal, the behavior is undefined. The memory associated with @p consumers
+ * can be reused or freed after the function returns.
+ *
+ * @param[out] signal Pointer to a memory location where the HSA runtime will
+ * store the newly created signal handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate the
+ * resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
+ * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
+ * contains duplicates.
+ */
+hsa_status_t HSA_API
+ hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
+ const hsa_agent_t *consumers, hsa_signal_t *signal);
+
+/**
+ * @brief Destroy a signal previous created by ::hsa_signal_create.
+ *
+ * @param[in] signal Signal.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p signal is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The handle in @p signal is 0.
+ */
+hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal);
+
+/**
+ * @brief Atomically read the current value of a signal.
+ *
+ * @param[in] signal Signal.
+ *
+ * @return Value of the signal.
+ */
+hsa_signal_value_t HSA_API hsa_signal_load_acquire(hsa_signal_t signal);
+
+/**
+ * @copydoc hsa_signal_load_acquire
+ */
+hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal);
+
+/**
+ * @brief Atomically set the value of a signal.
+ *
+ * @details If the value of the signal is changed, all the agents waiting
+ * on @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal.
+ *
+ * @param[in] value New signal value.
+ */
+void HSA_API
+ hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_store_relaxed
+ */
+void HSA_API
+ hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically set the value of a signal and return its previous value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting
+ * on @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value New value.
+ *
+ * @return Value of the signal prior to the exchange.
+ *
+ */
+hsa_signal_value_t HSA_API
+ hsa_signal_exchange_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_exchange_acq_rel
+ */
+hsa_signal_value_t HSA_API
+ hsa_signal_exchange_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_exchange_acq_rel
+ */
+hsa_signal_value_t HSA_API
+ hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_exchange_acq_rel
+ */
+hsa_signal_value_t HSA_API
+ hsa_signal_exchange_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically set the value of a signal if the observed value is equal to
+ * the expected value. The observed value is returned regardless of whether the
+ * replacement was done.
+ *
+ * @details If the value of the signal is changed, all the agents waiting
+ * on @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue
+ * doorbell signal, the behavior is undefined.
+ *
+ * @param[in] expected Value to compare with.
+ *
+ * @param[in] value New value.
+ *
+ * @return Observed value of the signal.
+ *
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_acq_rel(hsa_signal_t signal,
+ hsa_signal_value_t expected,
+ hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_cas_acq_rel
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_acquire(hsa_signal_t signal,
+ hsa_signal_value_t expected,
+ hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_cas_acq_rel
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal,
+ hsa_signal_value_t expected,
+ hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_cas_acq_rel
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_release(hsa_signal_t signal,
+ hsa_signal_value_t expected,
+ hsa_signal_value_t value);
+
+/**
+ * @brief Atomically increment the value of a signal by a given amount.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to add to the value of the signal.
+ *
+ */
+void HSA_API
+ hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_add_acq_rel
+ */
+void HSA_API
+ hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_add_acq_rel
+ */
+void HSA_API
+ hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_add_acq_rel
+ */
+void HSA_API
+ hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically decrement the value of a signal by a given amount.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to subtract from the value of the signal.
+ *
+ */
+void HSA_API
+ hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_subtract_acq_rel
+ */
+void HSA_API
+ hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_subtract_acq_rel
+ */
+void HSA_API
+ hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_subtract_acq_rel
+ */
+void HSA_API
+ hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically perform a bitwise AND operation between the value of a
+ * signal and a given value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to AND with the value of the signal.
+ *
+ */
+void HSA_API
+ hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_and_acq_rel
+ */
+void HSA_API
+ hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_and_acq_rel
+ */
+void HSA_API
+ hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_and_acq_rel
+ */
+void HSA_API
+ hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically perform a bitwise OR operation between the value of a
+ * signal and a given value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to OR with the value of the signal.
+ */
+void HSA_API
+ hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_or_acq_rel
+ */
+void HSA_API
+ hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_or_acq_rel
+ */
+void HSA_API
+ hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_or_acq_rel
+ */
+void HSA_API
+ hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically perform a bitwise XOR operation between the value of a
+ * signal and a given value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to XOR with the value of the signal.
+ *
+ */
+void HSA_API
+ hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_xor_acq_rel
+ */
+void HSA_API
+ hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_xor_acq_rel
+ */
+void HSA_API
+ hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_xor_acq_rel
+ */
+void HSA_API
+ hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Wait condition operator.
+ */
+typedef enum {
+ /**
+ * The two operands are equal.
+ */
+ HSA_SIGNAL_CONDITION_EQ = 0,
+ /**
+ * The two operands are not equal.
+ */
+ HSA_SIGNAL_CONDITION_NE = 1,
+ /**
+ * The first operand is less than the second operand.
+ */
+ HSA_SIGNAL_CONDITION_LT = 2,
+ /**
+ * The first operand is greater than or equal to the second operand.
+ */
+ HSA_SIGNAL_CONDITION_GTE = 3
+} hsa_signal_condition_t;
+
+/**
+ * @brief State of the application thread during a signal wait.
+ */
+typedef enum {
+ /**
+ * The application thread may be rescheduled while waiting on the signal.
+ */
+ HSA_WAIT_STATE_BLOCKED = 0,
+ /**
+ * The application thread stays active while waiting on a signal.
+ */
+ HSA_WAIT_STATE_ACTIVE = 1
+} hsa_wait_state_t;
+
+/**
+ * @brief Wait until a signal value satisfies a specified condition, or a
+ * certain amount of time has elapsed.
+ *
+ * @details A wait operation can spuriously resume at any time sooner than the
+ * timeout (for example, due to system or other external factors) even when the
+ * condition has not been met.
+ *
+ * The function is guaranteed to return if the signal value satisfies the
+ * condition at some point in time during the wait, but the value returned to
+ * the application might not satisfy the condition. The application must ensure
+ * that signals are used in such way that wait wakeup conditions are not
+ * invalidated before dependent threads have woken up.
+ *
+ * When the wait operation internally loads the value of the passed signal, it
+ * uses the memory order indicated in the function name.
+ *
+ * @param[in] signal Signal.
+ *
+ * @param[in] condition Condition used to compare the signal value with @p
+ * compare_value.
+ *
+ * @param[in] compare_value Value to compare with.
+ *
+ * @param[in] timeout_hint Maximum duration of the wait. Specified in the same
+ * unit as the system timestamp. The operation might block for a shorter or
+ * longer time even if the condition is not met. A value of UINT64_MAX indicates
+ * no maximum.
+ *
+ * @param[in] wait_state_hint Hint used by the application to indicate the
+ * preferred waiting state. The actual waiting state is ultimately decided by
+ * HSA runtime and may not match the provided hint. A value of
+ * ::HSA_WAIT_STATE_ACTIVE may improve the latency of response to a signal
+ * update by avoiding rescheduling overhead.
+ *
+ * @return Observed value of the signal, which might not satisfy the specified
+ * condition.
+ *
+ */
+hsa_signal_value_t HSA_API
+ hsa_signal_wait_acquire(hsa_signal_t signal,
+ hsa_signal_condition_t condition,
+ hsa_signal_value_t compare_value,
+ uint64_t timeout_hint,
+ hsa_wait_state_t wait_state_hint);
+
+/**
+ * @copydoc hsa_signal_wait_acquire
+ */
+hsa_signal_value_t HSA_API
+ hsa_signal_wait_relaxed(hsa_signal_t signal,
+ hsa_signal_condition_t condition,
+ hsa_signal_value_t compare_value,
+ uint64_t timeout_hint,
+ hsa_wait_state_t wait_state_hint);
+
+/** @} */
+
+/** \defgroup memory Memory
+ * @{
+ */
+
+/**
+ * @brief A memory region represents a block of virtual memory with certain
+ * properties. For example, the HSA runtime represents fine-grained memory in
+ * the global segment using a region. A region might be associated with more
+ * than one agent.
+ */
+typedef struct hsa_region_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_region_t;
+
+/** @} */
+
+/** \defgroup queue Queues
+ * @{
+ */
+
+/**
+ * @brief Queue type. Intended to be used for dynamic queue protocol
+ * determination.
+ */
+typedef enum {
+ /**
+ * Queue supports multiple producers.
+ */
+ HSA_QUEUE_TYPE_MULTI = 0,
+ /**
+ * Queue only supports a single producer.
+ */
+ HSA_QUEUE_TYPE_SINGLE = 1
+} hsa_queue_type_t;
+
+/**
+ * @brief Queue features.
+ */
+typedef enum {
+ /**
+ * Queue supports kernel dispatch packets.
+ */
+ HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1,
+
+ /**
+ * Queue supports agent dispatch packets.
+ */
+ HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2
+} hsa_queue_feature_t;
+
+/**
+ * @brief User mode queue.
+ *
+ * @details The queue structure is read-only and allocated by the HSA runtime,
+ * but agents can directly modify the contents of the buffer pointed by @a
+ * base_address, or use HSA runtime APIs to access the doorbell signal.
+ *
+ */
+typedef struct hsa_queue_s {
+ /**
+ * Queue type.
+ */
+ hsa_queue_type_t type;
+
+ /**
+ * Queue features mask. This is a bit-field of ::hsa_queue_feature_t
+ * values. Applications should ignore any unknown set bits.
+ */
+ uint32_t features;
+
+#ifdef HSA_LARGE_MODEL
+ void *base_address;
+#elif defined HSA_LITTLE_ENDIAN
+ /**
+ * Starting address of the HSA runtime-allocated buffer used to store the AQL
+ * packets. Must be aligned to the size of an AQL packet.
+ */
+ void *base_address;
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved0;
+#else
+ uint32_t reserved0;
+ void *base_address;
+#endif
+
+ /**
+ * Signal object used by the application to indicate the ID of a packet that
+ * is ready to be processed. The HSA runtime manages the doorbell signal. If
+ * the application tries to replace or destroy this signal, the behavior is
+ * undefined.
+ *
+ * If @a type is ::HSA_QUEUE_TYPE_SINGLE the doorbell signal value must be
+ * updated in a monotonically increasing fashion. If @a type is
+ * ::HSA_QUEUE_TYPE_MULTI, the doorbell signal value can be updated with any
+ * value.
+ */
+ hsa_signal_t doorbell_signal;
+
+ /**
+ * Maximum number of packets the queue can hold. Must be a power of 2.
+ */
+ uint32_t size;
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved1;
+ /**
+ * Queue identifier, which is unique over the lifetime of the application.
+ */
+ uint64_t id;
+
+} hsa_queue_t;
+
+/**
+ * @brief Create a user mode queue.
+ *
+ * @details The HSA runtime creates the queue structure, the underlying packet
+ * buffer, the completion signal, and the write and read indexes. The initial
+ * value of the write and read indexes is 0. The type of every packet in the
+ * buffer is initialized to ::HSA_PACKET_TYPE_INVALID.
+ *
+ * The application should only rely on the error code returned to determine if
+ * the queue is valid.
+ *
+ * @param[in] agent Agent where to create the queue.
+ *
+ * @param[in] size Number of packets the queue is expected to
+ * hold. Must be a power of 2 between 1 and the value of
+ * ::HSA_AGENT_INFO_QUEUE_MAX_SIZE in @p agent. The size of the newly
+ * created queue is the maximum of @p size and the value of
+ * ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent.
+ *
+ * @param[in] type Type of the queue. If the value of
+ * ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE, then @p
+ * type must also be ::HSA_QUEUE_TYPE_SINGLE.
+ *
+ * @param[in] callback Callback invoked by the HSA runtime for every
+ * asynchronous event related to the newly created queue. May be NULL. The HSA
+ * runtime passes three arguments to the callback: a code identifying the event
+ * that triggered the invocation, a pointer to the queue where the event
+ * originated, and the application data.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @param[in] private_segment_size Hint indicating the maximum
+ * expected private segment usage per work-item, in bytes. There may
+ * be performance degradation if the application places a kernel
+ * dispatch packet in the queue and the corresponding private segment
+ * usage exceeds @p private_segment_size. If the application does not
+ * want to specify any particular value for this argument, @p
+ * private_segment_size must be UINT32_MAX. If the queue does not
+ * support kernel dispatch packets, this argument is ignored.
+ *
+ * @param[in] group_segment_size Hint indicating the maximum expected
+ * group segment usage per work-group, in bytes. There may be
+ * performance degradation if the application places a kernel dispatch
+ * packet in the queue and the corresponding group segment usage
+ * exceeds @p group_segment_size. If the application does not want to
+ * specify any particular value for this argument, @p
+ * group_segment_size must be UINT32_MAX. If the queue does not
+ * support kernel dispatch packets, this argument is ignored.
+ *
+ * @param[out] queue Memory location where the HSA runtime stores a pointer to
+ * the newly created queue.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate
+ * the resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE_CREATION @p agent does not
+ * support queues of the given type.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two,
+ * @p size is 0, @p type is an invalid queue type, or @p queue is NULL.
+ *
+ */
+hsa_status_t HSA_API
+ hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
+ void (*callback)(hsa_status_t status, hsa_queue_t *source,
+ void *data),
+ void *data, uint32_t private_segment_size,
+ uint32_t group_segment_size, hsa_queue_t **queue);
+
+/**
+ * @brief Create a queue for which the application or a kernel is responsible
+ * for processing the AQL packets.
+ *
+ * @details The application can use this function to create queues where AQL
+ * packets are not parsed by the packet processor associated with an agent,
+ * but rather by a unit of execution running on that agent (for example, a
+ * thread in the host application).
+ *
+ * The application is responsible for ensuring that all the producers and
+ * consumers of the resulting queue can access the provided doorbell signal
+ * and memory region. The application is also responsible for ensuring that the
+ * unit of execution processing the queue packets supports the indicated
+ * features (AQL packet types).
+ *
+ * When the queue is created, the HSA runtime allocates the packet buffer using
+ * @p region, and the write and read indexes. The initial value of the write and
+ * read indexes is 0, and the type of every packet in the buffer is initialized
+ * to ::HSA_PACKET_TYPE_INVALID. The value of the @e size, @e type, @e features,
+ * and @e doorbell_signal fields in the returned queue match the values passed
+ * by the application.
+ *
+ * @param[in] region Memory region that the HSA runtime should use to allocate
+ * the AQL packet buffer and any other queue metadata.
+ *
+ * @param[in] size Number of packets the queue is expected to hold. Must be a
+ * power of 2 greater than 0.
+ *
+ * @param[in] type Queue type.
+ *
+ * @param[in] features Supported queue features. This is a bit-field of
+ * ::hsa_queue_feature_t values.
+ *
+ * @param[in] doorbell_signal Doorbell signal that the HSA runtime must
+ * associate with the returned queue. The signal handle must not be 0.
+ *
+ * @param[out] queue Memory location where the HSA runtime stores a pointer to
+ * the newly created queue. The application should not rely on the value
+ * returned for this argument but only in the status code to determine if the
+ * queue is valid. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate
+ * the resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, @p
+ * size is 0, @p type is an invalid queue type, the doorbell signal handle is
+ * 0, or @p queue is NULL.
+ *
+ */
+hsa_status_t HSA_API
+ hsa_soft_queue_create(hsa_region_t region, uint32_t size,
+ hsa_queue_type_t type, uint32_t features,
+ hsa_signal_t doorbell_signal, hsa_queue_t **queue);
+
+/**
+ * @brief Destroy a user mode queue.
+ *
+ * @details When a queue is destroyed, the state of the AQL packets that have
+ * not been yet fully processed (their completion phase has not finished)
+ * becomes undefined. It is the responsibility of the application to ensure that
+ * all pending queue operations are finished if their results are required.
+ *
+ * The resources allocated by the HSA runtime during queue creation (queue
+ * structure, ring buffer, doorbell signal) are released. The queue should not
+ * be accessed after being destroyed.
+ *
+ * @param[in] queue Pointer to a queue created using ::hsa_queue_create.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
+ */
+hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t *queue);
+
+/**
+ * @brief Inactivate a queue.
+ *
+ * @details Inactivating the queue aborts any pending executions and prevent any
+ * new packets from being processed. Any more packets written to the queue once
+ * it is inactivated will be ignored by the packet processor.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
+ */
+hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t *queue);
+
+/**
+ * @brief Atomically load the read index of a queue.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @return Read index of the queue pointed by @p queue.
+ */
+uint64_t HSA_API hsa_queue_load_read_index_acquire(const hsa_queue_t *queue);
+
+/**
+ * @copydoc hsa_queue_load_read_index_acquire
+ */
+uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t *queue);
+
+/**
+ * @brief Atomically load the write index of a queue.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @return Write index of the queue pointed by @p queue.
+ */
+uint64_t HSA_API hsa_queue_load_write_index_acquire(const hsa_queue_t *queue);
+
+/**
+ * @copydoc hsa_queue_load_write_index_acquire
+ */
+uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t *queue);
+
+/**
+ * @brief Atomically set the write index of a queue.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] value Value to assign to the write index.
+ *
+ */
+void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t *queue,
+ uint64_t value);
+
+/**
+ * @copydoc hsa_queue_store_write_index_relaxed
+ */
+void HSA_API hsa_queue_store_write_index_release(const hsa_queue_t *queue,
+ uint64_t value);
+
+/**
+ * @brief Atomically set the write index of a queue if the observed value is
+ * equal to the expected value. The application can inspect the returned value
+ * to determine if the replacement was done.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] expected Expected value.
+ *
+ * @param[in] value Value to assign to the write index if @p expected matches
+ * the observed write index. Must be greater than @p expected.
+ *
+ * @return Previous value of the write index.
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_acq_rel(const hsa_queue_t *queue,
+ uint64_t expected,
+ uint64_t value);
+
+/**
+ * @copydoc hsa_queue_cas_write_index_acq_rel
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_acquire(const hsa_queue_t *queue,
+ uint64_t expected,
+ uint64_t value);
+
+/**
+ * @copydoc hsa_queue_cas_write_index_acq_rel
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t *queue,
+ uint64_t expected,
+ uint64_t value);
+
+/**
+ * @copydoc hsa_queue_cas_write_index_acq_rel
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_release(const hsa_queue_t *queue,
+ uint64_t expected,
+ uint64_t value);
+
+/**
+ * @brief Atomically increment the write index of a queue by an offset.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] value Value to add to the write index.
+ *
+ * @return Previous value of the write index.
+ */
+uint64_t HSA_API
+ hsa_queue_add_write_index_acq_rel(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @copydoc hsa_queue_add_write_index_acq_rel
+ */
+uint64_t HSA_API
+ hsa_queue_add_write_index_acquire(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @copydoc hsa_queue_add_write_index_acq_rel
+ */
+uint64_t HSA_API
+ hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @copydoc hsa_queue_add_write_index_acq_rel
+ */
+uint64_t HSA_API
+ hsa_queue_add_write_index_release(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @brief Atomically set the read index of a queue.
+ *
+ * @details Modifications of the read index are not allowed and result in
+ * undefined behavior if the queue is associated with an agent for which
+ * only the corresponding packet processor is permitted to update the read
+ * index.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] value Value to assign to the read index.
+ *
+ */
+void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t *queue,
+ uint64_t value);
+
+/**
+ * @copydoc hsa_queue_store_read_index_relaxed
+ */
+void HSA_API hsa_queue_store_read_index_release(const hsa_queue_t *queue,
+ uint64_t value);
+/** @} */
+
+/** \defgroup aql Architected Queuing Language
+ * @{
+ */
+
+/**
+ * @brief Packet type.
+ */
+typedef enum {
+ /**
+ * Vendor-specific packet.
+ */
+ HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0,
+ /**
+ * The packet has been processed in the past, but has not been reassigned to
+ * the packet processor. A packet processor must not process a packet of this
+ * type. All queues support this packet type.
+ */
+ HSA_PACKET_TYPE_INVALID = 1,
+ /**
+ * Packet used by agents for dispatching jobs to kernel agents. Not all
+ * queues support packets of this type (see ::hsa_queue_feature_t).
+ */
+ HSA_PACKET_TYPE_KERNEL_DISPATCH = 2,
+ /**
+ * Packet used by agents to delay processing of subsequent packets, and to
+ * express complex dependencies between multiple packets. All queues support
+ * this packet type.
+ */
+ HSA_PACKET_TYPE_BARRIER_AND = 3,
+ /**
+ * Packet used by agents for dispatching jobs to agents. Not all
+ * queues support packets of this type (see ::hsa_queue_feature_t).
+ */
+ HSA_PACKET_TYPE_AGENT_DISPATCH = 4,
+ /**
+ * Packet used by agents to delay processing of subsequent packets, and to
+ * express complex dependencies between multiple packets. All queues support
+ * this packet type.
+ */
+ HSA_PACKET_TYPE_BARRIER_OR = 5,
+ HSA_PACKET_TYPE_COUNT = 6
+} hsa_packet_type_t;
+
+/**
+ * @brief Scope of the memory fence operation associated with a packet.
+ */
+typedef enum {
+ /**
+ * No scope (no fence is applied). The packet relies on external fences to
+ * ensure visibility of memory updates.
+ */
+ HSA_FENCE_SCOPE_NONE = 0,
+ /**
+ * The fence is applied with agent scope for the global segment.
+ */
+ HSA_FENCE_SCOPE_AGENT = 1,
+ /**
+ * The fence is applied across both agent and system scope for the global
+ * segment.
+ */
+ HSA_FENCE_SCOPE_SYSTEM = 2
+} hsa_fence_scope_t;
+
+/**
+ * @brief Sub-fields of the @a header field that is present in any AQL
+ * packet. The offset (with respect to the address of @a header) of a sub-field
+ * is identical to its enumeration constant. The width of each sub-field is
+ * determined by the corresponding value in ::hsa_packet_header_width_t. The
+ * offset and the width are expressed in bits.
+ */
+typedef enum {
+ /**
+ * Packet type. The value of this sub-field must be one of
+ * ::hsa_packet_type_t. If the type is ::HSA_PACKET_TYPE_VENDOR_SPECIFIC, the
+ * packet layout is vendor-specific.
+ */
+ HSA_PACKET_HEADER_TYPE = 0,
+ /**
+ * Barrier bit. If the barrier bit is set, the processing of the current
+ * packet only launches when all preceding packets (within the same queue) are
+ * complete.
+ */
+ HSA_PACKET_HEADER_BARRIER = 8,
+ /**
+ * Acquire fence scope. The value of this sub-field determines the scope and
+ * type of the memory fence operation applied before the packet enters the
+ * active phase. An acquire fence ensures that any subsequent global segment
+ * or image loads by any unit of execution that belongs to a dispatch that has
+ * not yet entered the active phase on any queue of the same kernel agent,
+ * sees any data previously released at the scopes specified by the acquire
+ * fence. The value of this sub-field must be one of ::hsa_fence_scope_t.
+ */
+ HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9,
+ /**
+ * Release fence scope, The value of this sub-field determines the scope and
+ * type of the memory fence operation applied after kernel completion but
+ * before the packet is completed. A release fence makes any global segment or
+ * image data that was stored by any unit of execution that belonged to a
+ * dispatch that has completed the active phase on any queue of the same
+ * kernel agent visible in all the scopes specified by the release fence. The
+ * value of this sub-field must be one of ::hsa_fence_scope_t.
+ */
+ HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11
+} hsa_packet_header_t;
+
+/**
+ * @brief Width (in bits) of the sub-fields in ::hsa_packet_header_t.
+ */
+typedef enum {
+ HSA_PACKET_HEADER_WIDTH_TYPE = 8,
+ HSA_PACKET_HEADER_WIDTH_BARRIER = 1,
+ HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2,
+ HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2
+} hsa_packet_header_width_t;
+
+/**
+ * @brief Sub-fields of the kernel dispatch packet @a setup field. The offset
+ * (with respect to the address of @a setup) of a sub-field is identical to its
+ * enumeration constant. The width of each sub-field is determined by the
+ * corresponding value in ::hsa_kernel_dispatch_packet_setup_width_t. The
+ * offset and the width are expressed in bits.
+ */
+typedef enum {
+ /**
+ * Number of dimensions of the grid. Valid values are 1, 2, or 3.
+ *
+ */
+ HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0
+} hsa_kernel_dispatch_packet_setup_t;
+
+/**
+ * @brief Width (in bits) of the sub-fields in
+ * ::hsa_kernel_dispatch_packet_setup_t.
+ */
+typedef enum {
+ HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2
+} hsa_kernel_dispatch_packet_setup_width_t;
+
+/**
+ * @brief AQL kernel dispatch packet
+ */
+typedef struct hsa_kernel_dispatch_packet_s {
+ /**
+ * Packet header. Used to configure multiple packet parameters such as the
+ * packet type. The parameters are described by ::hsa_packet_header_t.
+ */
+ uint16_t header;
+
+ /**
+ * Dispatch setup parameters. Used to configure kernel dispatch parameters
+ * such as the number of dimensions in the grid. The parameters are described
+ * by ::hsa_kernel_dispatch_packet_setup_t.
+ */
+ uint16_t setup;
+
+ /**
+ * X dimension of work-group, in work-items. Must be greater than 0.
+ */
+ uint16_t workgroup_size_x;
+
+ /**
+ * Y dimension of work-group, in work-items. Must be greater than
+ * 0. If the grid has 1 dimension, the only valid value is 1.
+ */
+ uint16_t workgroup_size_y;
+
+ /**
+ * Z dimension of work-group, in work-items. Must be greater than
+ * 0. If the grid has 1 or 2 dimensions, the only valid value is 1.
+ */
+ uint16_t workgroup_size_z;
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint16_t reserved0;
+
+ /**
+ * X dimension of grid, in work-items. Must be greater than 0. Must
+ * not be smaller than @a workgroup_size_x.
+ */
+ uint32_t grid_size_x;
+
+ /**
+ * Y dimension of grid, in work-items. Must be greater than 0. If the grid has
+ * 1 dimension, the only valid value is 1. Must not be smaller than @a
+ * workgroup_size_y.
+ */
+ uint32_t grid_size_y;
+
+ /**
+ * Z dimension of grid, in work-items. Must be greater than 0. If the grid has
+ * 1 or 2 dimensions, the only valid value is 1. Must not be smaller than @a
+ * workgroup_size_z.
+ */
+ uint32_t grid_size_z;
+
+ /**
+ * Size in bytes of private memory allocation request (per work-item).
+ */
+ uint32_t private_segment_size;
+
+ /**
+ * Size in bytes of group memory allocation request (per work-group). Must not
+ * be less than the sum of the group memory used by the kernel (and the
+ * functions it calls directly or indirectly) and the dynamically allocated
+ * group segment variables.
+ */
+ uint32_t group_segment_size;
+
+ /**
+ * Opaque handle to a code object that includes an implementation-defined
+ * executable code for the kernel.
+ */
+ uint64_t kernel_object;
+
+#ifdef HSA_LARGE_MODEL
+ void *kernarg_address;
+#elif defined HSA_LITTLE_ENDIAN
+ /**
+ * Pointer to a buffer containing the kernel arguments. May be NULL.
+ *
+ * The buffer must be allocated using ::hsa_memory_allocate, and must not be
+ * modified once the kernel dispatch packet is enqueued until the dispatch has
+ * completed execution.
+ */
+ void *kernarg_address;
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved1;
+#else
+ uint32_t reserved1;
+ void *kernarg_address;
+#endif
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint64_t reserved2;
+
+ /**
+ * Signal used to indicate completion of the job. The application can use the
+ * special signal handle 0 to indicate that no signal is used.
+ */
+ hsa_signal_t completion_signal;
+
+} hsa_kernel_dispatch_packet_t;
+
+/**
+ * @brief Agent dispatch packet.
+ */
+typedef struct hsa_agent_dispatch_packet_s {
+ /**
+ * Packet header. Used to configure multiple packet parameters such as the
+ * packet type. The parameters are described by ::hsa_packet_header_t.
+ */
+ uint16_t header;
+
+ /**
+ * Application-defined function to be performed by the destination agent.
+ */
+ uint16_t type;
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved0;
+
+#ifdef HSA_LARGE_MODEL
+ void *return_address;
+#elif defined HSA_LITTLE_ENDIAN
+ /**
+ * Address where to store the function return values, if any.
+ */
+ void *return_address;
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved1;
+#else
+ uint32_t reserved1;
+ void *return_address;
+#endif
+
+ /**
+ * Function arguments.
+ */
+ uint64_t arg[4];
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint64_t reserved2;
+
+ /**
+ * Signal used to indicate completion of the job. The application can use the
+ * special signal handle 0 to indicate that no signal is used.
+ */
+ hsa_signal_t completion_signal;
+
+} hsa_agent_dispatch_packet_t;
+
+/**
+ * @brief Barrier-AND packet.
+ */
+typedef struct hsa_barrier_and_packet_s {
+ /**
+ * Packet header. Used to configure multiple packet parameters such as the
+ * packet type. The parameters are described by ::hsa_packet_header_t.
+ */
+ uint16_t header;
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint16_t reserved0;
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved1;
+
+ /**
+ * Array of dependent signal objects. Signals with a handle value of 0 are
+ * allowed and are interpreted by the packet processor as satisfied
+ * dependencies.
+ */
+ hsa_signal_t dep_signal[5];
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint64_t reserved2;
+
+ /**
+ * Signal used to indicate completion of the job. The application can use the
+ * special signal handle 0 to indicate that no signal is used.
+ */
+ hsa_signal_t completion_signal;
+
+} hsa_barrier_and_packet_t;
+
+/**
+ * @brief Barrier-OR packet.
+ */
+typedef struct hsa_barrier_or_packet_s {
+ /**
+ * Packet header. Used to configure multiple packet parameters such as the
+ * packet type. The parameters are described by ::hsa_packet_header_t.
+ */
+ uint16_t header;
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint16_t reserved0;
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved1;
+
+ /**
+ * Array of dependent signal objects. Signals with a handle value of 0 are
+ * allowed and are interpreted by the packet processor as dependencies not
+ * satisfied.
+ */
+ hsa_signal_t dep_signal[5];
+
+ /**
+ * Reserved. Must be 0.
+ */
+ uint64_t reserved2;
+
+ /**
+ * Signal used to indicate completion of the job. The application can use the
+ * special signal handle 0 to indicate that no signal is used.
+ */
+ hsa_signal_t completion_signal;
+
+} hsa_barrier_or_packet_t;
+
+/** @} */
+
+/** \addtogroup memory Memory
+ * @{
+ */
+
+/**
+ * @brief Memory segments associated with a region.
+ */
+typedef enum {
+ /**
+ * Global segment. Used to hold data that is shared by all agents.
+ */
+ HSA_REGION_SEGMENT_GLOBAL = 0,
+ /**
+ * Read-only segment. Used to hold data that remains constant during the
+ * execution of a kernel.
+ */
+ HSA_REGION_SEGMENT_READONLY = 1,
+ /**
+ * Private segment. Used to hold data that is local to a single work-item.
+ */
+ HSA_REGION_SEGMENT_PRIVATE = 2,
+ /**
+ * Group segment. Used to hold data that is shared by the work-items of a
+ * work-group.
+ */
+ HSA_REGION_SEGMENT_GROUP = 3,
+ /* TODO: non-standard enums */
+ HSA_REGION_SEGMENT_ARG = 4,
+ HSA_REGION_SEGMENT_KERNARG = 5,
+ HSA_REGION_SEGMENT_SPILL = 6
+} hsa_region_segment_t;
+
+/**
+ * @brief Global region flags.
+ */
+typedef enum {
+ /**
+ * The application can use memory in the region to store kernel arguments, and
+ * provide the values for the kernarg segment of a kernel dispatch. If this
+ * flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set.
+ */
+ HSA_REGION_GLOBAL_FLAG_KERNARG = 1,
+ /**
+ * Updates to memory in this region are immediately visible to all the
+ * agents under the terms of the HSA memory model. If this
+ * flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set.
+ */
+ HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2,
+ /**
+ * Updates to memory in this region can be performed by a single agent at
+ * a time. If a different agent in the system is allowed to access the
+ * region, the application must explicitely invoke ::hsa_memory_assign_agent
+ * in order to transfer ownership to that agent for a particular buffer.
+ */
+ HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4
+} hsa_region_global_flag_t;
+
+/**
+ * @brief Attributes of a memory region.
+ */
+typedef enum {
+ /**
+ * Segment where memory in the region can be used. The type of this
+ * attribute is ::hsa_region_segment_t.
+ */
+ HSA_REGION_INFO_SEGMENT = 0,
+ /**
+ * Flag mask. The value of this attribute is undefined if the value of
+ * ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of
+ * this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t
+ * values.
+ */
+ HSA_REGION_INFO_GLOBAL_FLAGS = 1,
+ /**
+ * Size of this region, in bytes. The type of this attribute is size_t.
+ */
+ HSA_REGION_INFO_SIZE = 2,
+ /**
+ * Maximum allocation size in this region, in bytes. Must not exceed the value
+ * of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t.
+ *
+ * If the region is in the global or readonly segments, this is the maximum
+ * size that the application can pass to ::hsa_memory_allocate. If the region
+ * is in the group segment, this is the maximum size (per work-group) that can
+ * be requested for a given kernel dispatch. If the region is in the private
+ * segment, this is the maximum size (per work-item) that can be request for a
+ * specific kernel dispatch.
+ */
+ HSA_REGION_INFO_ALLOC_MAX_SIZE = 4,
+ /**
+ * Indicates whether memory in this region can be allocated using
+ * ::hsa_memory_allocate. The type of this attribute is bool.
+ *
+ * The value of this flag is always false for regions in the group and private
+ * segments.
+ */
+ HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5,
+ /**
+ * Allocation granularity of buffers allocated by ::hsa_memory_allocate in
+ * this region. The size of a buffer allocated in this region is a multiple of
+ * the value of this attribute. The value of this attribute is only defined if
+ * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type
+ * of this attribute is size_t.
+ */
+ HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6,
+ /**
+ * Alignment of buffers allocated by ::hsa_memory_allocate in this region. The
+ * value of this attribute is only defined if
+ * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must
+ * be a power of 2. The type of this attribute is size_t.
+ */
+ HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
+ HSA_REGION_INFO_COUNT = 8
+} hsa_region_info_t;
+
+/**
+ * @brief Get the current value of an attribute of a region.
+ *
+ * @param[in] region A valid region.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to a application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * region attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region,
+ hsa_region_info_t attribute,
+ void *value);
+
+/**
+ * @brief Iterate over the memory regions associated with a given agent, and
+ * invoke an application-defined callback on every iteration.
+ *
+ * @param[in] agent A valid agent.
+ *
+ * @param[in] callback Callback to be invoked once per region that is
+ * accessible from the agent. The HSA runtime passes two arguments to the
+ * callback, the region and the application data. If @p callback returns a
+ * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
+ * traversal stops and ::hsa_agent_iterate_regions returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_agent_iterate_regions(
+ hsa_agent_t agent,
+ hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
+
+/**
+ * @brief Allocate a block of memory in a given region.
+ *
+ * @param[in] region Region where to allocate memory from. The region must have
+ * the ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED flag set.
+ *
+ * @param[in] size Allocation size, in bytes. Must not be zero. This value is
+ * rounded up to the nearest multiple of ::HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE
+ * in @p region.
+ *
+ * @param[out] ptr Pointer to the location where to store the base address of
+ * the allocated block. The returned base address is aligned to the value of
+ * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT in @p region. If the allocation
+ * fails, the returned value is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES No memory is available.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
+ * allocate memory in @p region, or @p size is greater than the value of
+ * HSA_REGION_INFO_ALLOC_MAX_SIZE in @p region.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0.
+ */
+hsa_status_t HSA_API
+ hsa_memory_allocate(hsa_region_t region, size_t size, void **ptr);
+
+/**
+ * @brief Deallocate a block of memory previously allocated using
+ * ::hsa_memory_allocate.
+ *
+ * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
+ * previously returned by ::hsa_memory_allocate, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ */
+hsa_status_t HSA_API hsa_memory_free(void *ptr);
+
+/**
+ * @brief Copy a block of memory.
+ *
+ * @param[out] dst Buffer where the content is to be copied.
+ *
+ * @param[in] src A valid pointer to the source of data to be copied.
+ *
+ * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
+ * performed and the function returns success. Copying a number of bytes larger
+ * than the size of the buffers pointed by @p dst or @p src results in undefined
+ * behavior.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
+ * pointers are NULL.
+ */
+hsa_status_t HSA_API hsa_memory_copy(void *dst, const void *src, size_t size);
+
+/**
+ * @brief Change the ownership of a global, coarse-grained buffer.
+ *
+ * @details The contents of a coarse-grained buffer are visible to an agent
+ * only after ownership has been explicitely transferred to that agent. Once the
+ * operation completes, the previous owner cannot longer access the data in the
+ * buffer.
+ *
+ * An implementation of the HSA runtime is allowed, but not required, to change
+ * the physical location of the buffer when ownership is transferred to a
+ * different agent. In general the application must not assume this
+ * behavior. The virtual location (address) of the passed buffer is never
+ * modified.
+ *
+ * @param[in] ptr Base address of a global buffer. The pointer should match an
+ * address previously returned by ::hsa_memory_allocate. The size of the buffer
+ * affected by the ownership change is identical to the size of that previous
+ * allocation. If @p ptr points to a fine-grained global buffer, no operation is
+ * performed and the function returns success. If @p ptr does not point to
+ * global memory, the behavior is undefined.
+ *
+ * @param[in] agent Agent that becomes the owner of the buffer. The
+ * application is responsible for ensuring that @p agent has access to the
+ * region that contains the buffer. It is allowed to change ownership to an
+ * agent that is already the owner of the buffer, with the same or different
+ * access permissions.
+ *
+ * @param[in] access Access permissions requested for the new owner.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is unable to
+ * acquire the resources required by the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p access is
+ * not a valid access value.
+ */
+hsa_status_t HSA_API hsa_memory_assign_agent(void *ptr, hsa_agent_t agent,
+ hsa_access_permission_t access);
+
+/**
+ *
+ * @brief Register a global, fine-grained buffer.
+ *
+ * @details Registering a buffer serves as an indication to the HSA runtime that
+ * the memory might be accessed from a kernel agent other than the
+ * host. Registration is a performance hint that allows the HSA runtime
+ * implementation to know which buffers will be accessed by some of the kernel
+ * agents ahead of time.
+ *
+ * Registration is only recommended for buffers in the global segment that have
+ * not been allocated using the HSA allocator (::hsa_memory_allocate), but an OS
+ * allocator instead.
+ *
+ * Registrations should not overlap.
+ *
+ * @param[in] ptr A buffer in global memory. If a NULL pointer is passed, no
+ * operation is performed.
+ *
+ * @param[in] size Requested registration size in bytes. A size of 0 is
+ * only allowed if @p ptr is NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
+ * allocating the necessary resources.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 but @p ptr
+ * is not NULL.
+ */
+hsa_status_t HSA_API hsa_memory_register(void *ptr, size_t size);
+
+/**
+ *
+ * @brief Deregister memory previously registered using ::hsa_memory_register.
+ *
+ * @details If the memory interval being deregistered does not match a previous
+ * registration (start and end addresses), the behavior is undefined.
+ *
+ * @param[in] ptr A pointer to the base of the buffer to be deregistered. If
+ * a NULL pointer is passed, no operation is performed.
+ *
+ * @param[in] size Size of the buffer to be deregistered.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ */
+hsa_status_t HSA_API hsa_memory_deregister(void *ptr, size_t size);
+
+/** @} */
+
+/** \defgroup symbol-attributes Symbol Attributes
+ * @{
+ */
+
+/**
+ * @brief Symbol type.
+ */
+typedef enum {
+ /**
+ * Variable.
+ */
+ HSA_SYMBOL_KIND_VARIABLE = 0,
+ /**
+ * Kernel.
+ */
+ HSA_SYMBOL_KIND_KERNEL = 1,
+ /**
+ * Indirect function.
+ */
+ HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2
+} hsa_symbol_kind_t;
+
+/**
+ * @brief Allocation type of a variable.
+ */
+typedef enum {
+ /**
+ * Agent allocation.
+ */
+ HSA_VARIABLE_ALLOCATION_AGENT = 0,
+ /**
+ * Program allocation.
+ */
+ HSA_VARIABLE_ALLOCATION_PROGRAM = 1
+} hsa_variable_allocation_t;
+
+/**
+ * @brief Linkage type of a symbol.
+ */
+typedef enum {
+ /**
+ * Module linkage.
+ */
+ HSA_SYMBOL_LINKAGE_MODULE = 0,
+ /**
+ * Program linkage.
+ */
+ HSA_SYMBOL_LINKAGE_PROGRAM = 1
+} hsa_symbol_linkage_t;
+
+/**
+ * @brief Memory segment associated with a variable.
+ */
+typedef enum {
+ /**
+ * Global memory segment.
+ */
+ HSA_VARIABLE_SEGMENT_GLOBAL = 0,
+ /**
+ * Readonly memory segment.
+ */
+ HSA_VARIABLE_SEGMENT_READONLY = 1
+} hsa_variable_segment_t;
+
+/** @} */
+
+/** \defgroup code-object Code Object
+ * @{
+ */
+
+/**
+ * @brief Instruction set architecture.
+ */
+typedef struct hsa_isa_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_isa_t;
+
+/**
+ * @brief Retrieve a reference to an ISA handle out of a symbolic name.
+ *
+ * @param[in] name Vendor-specific name associated with a particular instruction
+ * set architecture. Must be a NUL-terminated string.
+ *
+ * @param[out] isa Memory location where the HSA runtime stores the ISA handle
+ * corresponding to the given name. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p name is NULL, or @p isa is
+ * NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA_NAME The given name does not
+ * correspond to any instruction set architecture.
+ */
+hsa_status_t HSA_API hsa_isa_from_name(
+ const char* name,
+ hsa_isa_t* isa);
+
+/**
+ * @brief Instruction set architecture attributes.
+ */
+typedef enum {
+ /**
+ * The length of the ISA name. The type of this attribute is uint32_t.
+ */
+ HSA_ISA_INFO_NAME_LENGTH = 0,
+ /**
+ * Human-readable description. The type of this attribute is character array
+ * with the length equal to the value of ::HSA_ISA_INFO_NAME_LENGTH attribute.
+ */
+ HSA_ISA_INFO_NAME = 1,
+ /**
+ * Number of call conventions supported by the instruction set architecture.
+ * The type of this attribute is uint32_t.
+ */
+ HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2,
+ /**
+ * Number of work-items in a wavefront for a given call convention. Must be a
+ * power of 2 in the range [1,256]. The type of this attribute is uint32_t.
+ */
+ HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3,
+ /**
+ * Number of wavefronts per compute unit for a given call convention. In
+ * practice, other factors (for example, the amount of group memory used by a
+ * work-group) may further limit the number of wavefronts per compute
+ * unit. The type of this attribute is uint32_t.
+ */
+ HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4
+} hsa_isa_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given instruction set
+ * architecture (ISA).
+ *
+ * @param[in] isa A valid instruction set architecture.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[in] index Call convention index. Used only for call convention
+ * attributes, otherwise ignored. Must have a value between 0 (inclusive) and
+ * the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT (not
+ * inclusive) in @p isa.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
+ * invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_INDEX @p index out of range.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * instruction set architecture attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_isa_get_info(
+ hsa_isa_t isa,
+ hsa_isa_info_t attribute,
+ uint32_t index,
+ void* value);
+
+/**
+ * @brief Check if the instruction set architecture of a code object can be
+ * executed on an agent associated with another architecture.
+ *
+ * @param[in] code_object_isa Instruction set architecture associated with a
+ * code object.
+ *
+ * @param[in] agent_isa Instruction set architecture associated with an agent.
+ *
+ * @param[out] result Pointer to a memory location where the HSA runtime stores
+ * the result of the check. If the two architectures are compatible, the result
+ * is true; if they are incompatible, the result is false.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p code_object_isa or @p agent_isa are
+ * invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
+ */
+hsa_status_t HSA_API hsa_isa_compatible(
+ hsa_isa_t code_object_isa,
+ hsa_isa_t agent_isa,
+ bool* result);
+
+/**
+ * @brief An opaque handle to a code object, which contains ISA for finalized
+ * kernels and indirect functions together with information about the
+ * global/readonly segment variables they reference.
+ */
+typedef struct hsa_code_object_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_code_object_t;
+
+/**
+ * @brief Opaque handle to application data that is passed to the serialization
+ * and deserialization functions.
+ */
+typedef struct hsa_callback_data_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_callback_data_t;
+
+/**
+ * @brief Serialize a code object. Can be used for offline finalization,
+ * install-time finalization, disk code caching, etc.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] alloc_callback Callback function for memory allocation. Must not
+ * be NULL. The HSA runtime passes three arguments to the callback: the
+ * allocation size, the application data, and a pointer to a memory location
+ * where the application stores the allocation result. The HSA runtime invokes
+ * @p alloc_callback once to allocate a buffer that contains the serialized
+ * version of @p code_object. If the callback returns a status code other than
+ * ::HSA_STATUS_SUCCESS, this function returns the same code.
+ *
+ * @param[in] callback_data Application data that is passed to @p
+ * alloc_callback. May be NULL.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] serialized_code_object Memory location where the HSA runtime
+ * stores a pointer to the serialized code object. Must not be NULL.
+ *
+ * @param[out] serialized_code_object_size Memory location where the HSA runtime
+ * stores the size (in bytes) of @p serialized_code_object. The returned value
+ * matches the allocation size passed by the HSA runtime to @p
+ * alloc_callback. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p alloc_callback, @p
+ * serialized_code_object, or @p serialized_code_object_size are NULL.
+ */
+hsa_status_t HSA_API hsa_code_object_serialize(
+ hsa_code_object_t code_object,
+ hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data, void **address),
+ hsa_callback_data_t callback_data,
+ const char *options,
+ void **serialized_code_object,
+ size_t *serialized_code_object_size);
+
+/**
+ * @brief Deserialize a code object.
+ *
+ * @param[in] serialized_code_object A serialized code object. Must not be NULL.
+ *
+ * @param[in] serialized_code_object_size The size (in bytes) of @p
+ * serialized_code_object. Must not be 0.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] code_object Memory location where the HSA runtime stores the
+ * deserialized code object.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p serialized_code_object, or @p
+ * code_object are NULL. @p serialized_code_object_size is 0.
+ */
+hsa_status_t HSA_API hsa_code_object_deserialize(
+ void *serialized_code_object,
+ size_t serialized_code_object_size,
+ const char *options,
+ hsa_code_object_t *code_object);
+
+/**
+ * @brief Destroy a code object.
+ *
+ * @details The lifetime of a code object must exceed that of any executable
+ * where it has been loaded. If an executable that loaded @p code_object has not
+ * been destroyed, the behavior is undefined.
+ *
+ * @param[in] code_object Code object. The handle becomes invalid after it has
+ * been destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ */
+hsa_status_t HSA_API hsa_code_object_destroy(
+ hsa_code_object_t code_object);
+
+/**
+ * @brief Code object type.
+ */
+typedef enum {
+ /**
+ * Produces code object that contains ISA for all kernels and indirect
+ * functions in HSA source.
+ */
+ HSA_CODE_OBJECT_TYPE_PROGRAM = 0
+} hsa_code_object_type_t;
+
+/**
+ * @brief Code object attributes.
+ */
+typedef enum {
+ /**
+ * The version of the code object. The type of this attribute is a
+ * NUL-terminated char[64]. If the version of the code object uses less than
+ * 63 characters, the rest of the array must be filled with NULs.
+ */
+ HSA_CODE_OBJECT_INFO_VERSION = 0,
+ /**
+ * Type of code object. The type of this attribute is
+ * ::hsa_code_object_type_t.
+ */
+ HSA_CODE_OBJECT_INFO_TYPE = 1,
+ /**
+ * Instruction set architecture this code object is produced for. The type of
+ * this attribute is ::hsa_isa_t.
+ */
+ HSA_CODE_OBJECT_INFO_ISA = 2,
+ /**
+ * Machine model this code object is produced for. The type of this attribute
+ * is ::hsa_machine_model_t.
+ */
+ HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3,
+ /**
+ * Profile this code object is produced for. The type of this attribute is
+ * ::hsa_profile_t.
+ */
+ HSA_CODE_OBJECT_INFO_PROFILE = 4,
+ /**
+ * Default floating-point rounding mode used when the code object is
+ * produced. The type of this attribute is
+ * ::hsa_default_float_rounding_mode_t.
+ */
+ HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5
+} hsa_code_object_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given code object.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * code object attribute, or @p value is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ */
+hsa_status_t HSA_API hsa_code_object_get_info(
+ hsa_code_object_t code_object,
+ hsa_code_object_info_t attribute,
+ void *value);
+
+/**
+ * @brief Code object symbol.
+ */
+typedef struct hsa_code_symbol_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_code_symbol_t;
+
+/**
+ * @brief Get the symbol handle within a code object for a given a symbol name.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] symbol_name Symbol name.
+ *
+ * @param[out] symbol Memory location where the HSA runtime stores the symbol
+ * handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
+ * that matches @p symbol_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
+ * @p symbol is NULL.
+ */
+hsa_status_t HSA_API hsa_code_object_get_symbol(
+ hsa_code_object_t code_object,
+ const char *symbol_name,
+ hsa_code_symbol_t *symbol);
+
+/**
+ * @brief Code object symbol attributes.
+ */
+typedef enum {
+ /**
+ * The type of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
+ */
+ HSA_CODE_SYMBOL_INFO_TYPE = 0,
+ /**
+ * The length of the symbol name. The type of this attribute is uint32_t.
+ */
+ HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1,
+ /**
+ * The name of the symbol. The type of this attribute is character array with
+ * the length equal to the value of ::HSA_CODE_SYMBOL_INFO_NAME_LENGTH
+ * attribute
+ */
+ HSA_CODE_SYMBOL_INFO_NAME = 2,
+ /**
+ * The length of the module name to which this symbol belongs if this symbol
+ * has module linkage, otherwise 0 is returned. The type of this attribute is
+ * uint32_t.
+ */
+ HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
+ /**
+ * The module name to which this symbol belongs if this symbol has module
+ * linkage, otherwise empty string is returned. The type of this attribute is
+ * character array with the length equal to the value of
+ * ::HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
+ */
+ HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4,
+ /**
+ * The linkage kind of the symbol. The type of this attribute is
+ * ::hsa_symbol_linkage_t.
+ */
+ HSA_CODE_SYMBOL_INFO_LINKAGE = 5,
+ /**
+ * Indicates whether the symbol corresponds to a definition. The type of this
+ * attribute is bool.
+ */
+ HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17,
+ /**
+ * The allocation kind of the variable. The value of this attribute is
+ * undefined if the symbol is not a variable. The type of this attribute is
+ * ::hsa_variable_allocation_t.
+ */
+ HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
+ /**
+ * The segment kind of the variable. The value of this attribute is
+ * undefined if the symbol is not a variable. The type of this attribute is
+ * ::hsa_variable_segment_t.
+ */
+ HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
+ /**
+ * Alignment of the variable. The value of this attribute is undefined if the
+ * symbol is not a variable. The type of this attribute is uint32_t.
+ */
+ HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
+ /**
+ * Size of the variable. The value of this attribute is undefined if the
+ * symbol is not a variable. The type of this attribute is uint32_t.
+ *
+ * A size of 0 is returned if the variable is an external variable and has an
+ * unknown dimension.
+ */
+ HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9,
+ /**
+ * Indicates whether the variable is constant. The value of this attribute is
+ * undefined if the symbol is not a variable. The type of this attribute is
+ * bool.
+ */
+ HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
+ /**
+ * Size of kernarg segment memory that is required to hold the values of the
+ * kernel arguments, in bytes. The value of this attribute is undefined if the
+ * symbol is not a kernel. The type of this attribute is uint32_t.
+ */
+ HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
+ /**
+ * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
+ * which is the maximum of 16 and the maximum alignment of any of the kernel
+ * arguments. The value of this attribute is undefined if the symbol is not a
+ * kernel. The type of this attribute is uint32_t.
+ */
+ HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
+ /**
+ * Size of static group segment memory required by the kernel (per
+ * work-group), in bytes. The value of this attribute is undefined
+ * if the symbol is not a kernel. The type of this attribute is uint32_t.
+ *
+ * The reported amount does not include any dynamically allocated group
+ * segment memory that may be requested by the application when a kernel is
+ * dispatched.
+ */
+ HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
+ /**
+ * Size of static private, spill, and arg segment memory required by
+ * this kernel (per work-item), in bytes. The value of this attribute is
+ * undefined if the symbol is not a kernel. The type of this attribute is
+ * uint32_t.
+ *
+ * If the value of ::HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is true,
+ * the kernel may use more private memory than the reported value, and the
+ * application must add the dynamic call stack usage to @a
+ * private_segment_size when populating a kernel dispatch packet.
+ */
+ HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
+ /**
+ * Dynamic callstack flag. The value of this attribute is undefined if the
+ * symbol is not a kernel. The type of this attribute is bool.
+ *
+ * If this flag is set (the value is true), the kernel uses a dynamically
+ * sized call stack. This can happen if recursive calls, calls to indirect
+ * functions, or the HSAIL alloca instruction are present in the kernel.
+ */
+ HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
+ /**
+ * Call convention of the indirect function. The value of this attribute is
+ * undefined if the symbol is not an indirect function. The type of this
+ * attribute is uint32_t.
+ */
+ HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
+} hsa_code_symbol_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given code symbol.
+ *
+ * @param[in] code_symbol Code symbol.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * code symbol attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_code_symbol_get_info(
+ hsa_code_symbol_t code_symbol,
+ hsa_code_symbol_info_t attribute,
+ void *value);
+
+/**
+ * @brief Iterate over the symbols in a code object, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] callback Callback to be invoked once per code object symbol. The
+ * HSA runtime passes three arguments to the callback: the code object, a
+ * symbol, and the application data. If @p callback returns a status other than
+ * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
+ * ::hsa_code_object_iterate_symbols returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_code_object_iterate_symbols(
+ hsa_code_object_t code_object,
+ hsa_status_t (*callback)(hsa_code_object_t code_object, hsa_code_symbol_t symbol, void* data),
+ void* data);
+
+/** @} */
+
+/** \defgroup executable Executable
+ * @{
+ */
+
+/**
+ * @brief An opaque handle to an executable, which contains ISA for finalized
+ * kernels and indirect functions together with the allocated global/readonly
+ * segment variables they reference.
+ */
+typedef struct hsa_executable_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_executable_t;
+
+/**
+ * @brief Executable state.
+ */
+typedef enum {
+ /**
+ * Executable state, which allows the user to load code objects and define
+ * external variables. Variable addresses, kernel code handles, and
+ * indirect function code handles are not available in query operations until
+ * the executable is frozen (zero always returned).
+ */
+ HSA_EXECUTABLE_STATE_UNFROZEN = 0,
+ /**
+ * Executable state, which allows the user to query variable addresses,
+ * kernel code handles, and indirect function code handles using query
+ * operation. Loading new code objects, as well as defining external variables
+ * is not allowed in this state.
+ */
+ HSA_EXECUTABLE_STATE_FROZEN = 1
+} hsa_executable_state_t;
+
+/**
+ * @brief Create an empty executable.
+ *
+ * @param[in] profile Profile used in the executable.
+ *
+ * @param[in] executable_state Executable state. If the state is
+ * ::HSA_EXECUTABLE_STATE_FROZEN, the resulting executable is useless because no
+ * code objects can be loaded, and no variables can be defined.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] executable Memory location where the HSA runtime stores newly
+ * created executable handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
+ * @p executable is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_create(
+ hsa_profile_t profile,
+ hsa_executable_state_t executable_state,
+ const char *options,
+ hsa_executable_t *executable);
+
+/**
+ * @brief Destroy an executable.
+ *
+ * @details Executable handle becomes invalid after the executable has been
+ * destroyed. Code object handles that were loaded into this executable are
+ * still valid after the executable has been destroyed, and can be used as
+ * intended. Resources allocated outside and associated with this executable
+ * (such as external global/readonly variables) can be released after the
+ * executable has been destroyed.
+ *
+ * Executable should not be destroyed while kernels are in flight.
+ *
+ * @param[in] executable Executable.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ */
+hsa_status_t HSA_API hsa_executable_destroy(
+ hsa_executable_t executable);
+
+/**
+ * @brief Load code object into the executable.
+ *
+ * @details Every global/readonly variable that is external must be defined
+ * using define set of operations before loading code objects. Internal
+ * global/readonly variable is allocated once the code object, that is being
+ * loaded, references this variable and this variable is not allocated.
+ *
+ * Any module linkage declaration must have been defined either by a define
+ * variable or by loading a code object that has a symbol with module linkage
+ * definition.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] agent Agent to load code object for. The agent must support the
+ * default floating-point rounding mode used by @p code_object.
+ *
+ * @param[in] code_object Code object to load. The lifetime of the code object
+ * must exceed that of the executable: if @p code_object is destroyed before @p
+ * executable, the behavior is undefined.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p agent is not compatible
+ * with @p code_object (for example, @p agent does not support the default
+ * floating-point rounding mode specified by @p code_object), or @p code_object
+ * is not compatible with @p executable (for example, @p code_object and @p
+ * executable have different machine models or profiles).
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_load_code_object(
+ hsa_executable_t executable,
+ hsa_agent_t agent,
+ hsa_code_object_t code_object,
+ const char *options);
+
+/**
+ * @brief Freeze the executable.
+ *
+ * @details No modifications to executable can be made after freezing: no
+ * code objects can be loaded to the executable, no external variables can
+ * be defined. Freezing the executable does not prevent querying executable's
+ * attributes.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_UNDEFINED One or more variable is
+ * undefined in the executable.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is already frozen.
+ */
+hsa_status_t HSA_API hsa_executable_freeze(
+ hsa_executable_t executable,
+ const char *options);
+
+/**
+ * @brief Executable attributes.
+ */
+typedef enum {
+ /**
+ * Profile this executable is created for. The type of this attribute is
+ * ::hsa_profile_t.
+ */
+ HSA_EXECUTABLE_INFO_PROFILE = 1,
+ /**
+ * Executable state. The type of this attribute is ::hsa_executable_state_t.
+ */
+ HSA_EXECUTABLE_INFO_STATE = 2
+} hsa_executable_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given executable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * executable attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_get_info(
+ hsa_executable_t executable,
+ hsa_executable_info_t attribute,
+ void *value);
+
+/**
+ * @brief Define an external global variable with program allocation.
+ *
+ * @details This function allows the application to provide the definition
+ * of a variable in the global segment memory with program allocation. The
+ * variable must be defined before loading a code object into an executable.
+ * In addition, code objects loaded must not define the variable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] variable_name Name of the variable.
+ *
+ * @param[in] address Address where the variable is defined. The buffer pointed
+ * by @p address is owned by the application, and cannot be deallocated before
+ * @p executable is destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
+ * already defined.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
+ * @p variable_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_global_variable_define(
+ hsa_executable_t executable,
+ const char *variable_name,
+ void *address);
+
+/**
+ * @brief Define an external global variable with agent allocation.
+ *
+ * @details This function allows the application to provide the definition
+ * of a variable in the global segment memory with agent allocation. The
+ * variable must be defined before loading a code object into an executable.
+ * In addition, code objects loaded must not define the variable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] agent Agent for which the variable is being defined.
+ *
+ * @param[in] variable_name Name of the variable.
+ *
+ * @param[in] address Address where the variable is defined. The buffer pointed
+ * by @p address is owned by the application, and cannot be deallocated before
+ * @p executable is destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
+ * already defined.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
+ * @p variable_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_agent_global_variable_define(
+ hsa_executable_t executable,
+ hsa_agent_t agent,
+ const char *variable_name,
+ void *address);
+
+/**
+ * @brief Define an external readonly variable.
+ *
+ * @details This function allows the application to provide the definition
+ * of a variable in the readonly segment memory. The variable must be defined
+ * before loading a code object into an executable. In addition, code objects
+ * loaded must not define the variable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] agent Agent for which the variable is being defined.
+ *
+ * @param[in] variable_name Name of the variable.
+ *
+ * @param[in] address Address where the variable is defined. The buffer pointed
+ * by @p address is owned by the application, and cannot be deallocated before
+ * @p executable is destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
+ * already defined.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
+ * @p variable_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_readonly_variable_define(
+ hsa_executable_t executable,
+ hsa_agent_t agent,
+ const char *variable_name,
+ void *address);
+
+/**
+ * @brief Validate executable. Checks that all code objects have matching
+ * machine model, profile, and default floating-point rounding mode. Checks that
+ * all declarations have definitions. Checks declaration-definition
+ * compatibility (see HSA Programming Reference Manual for compatibility rules).
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[out] result Memory location where the HSA runtime stores the
+ * validation result. If the executable is valid, the result is 0.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_validate(
+ hsa_executable_t executable,
+ uint32_t* result);
+
+/**
+ * @brief Executable symbol.
+ */
+typedef struct hsa_executable_symbol_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_executable_symbol_t;
+
+/**
+ * @brief Get the symbol handle for a given a symbol name.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] module_name Module name. Must be NULL if the symbol has
+ * program linkage.
+ *
+ * @param[in] symbol_name Symbol name.
+ *
+ * @param[in] agent Agent associated with the symbol. If the symbol is
+ * independent of any agent (for example, a variable with program
+ * allocation), this argument is ignored.
+ *
+ * @param[in] call_convention Call convention associated with the symbol. If the
+ * symbol does not correspond to an indirect function, this argument is ignored.
+ *
+ * @param[out] symbol Memory location where the HSA runtime stores the symbol
+ * handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
+ * that matches @p symbol_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
+ * @p symbol is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_get_symbol(
+ hsa_executable_t executable,
+ const char *module_name,
+ const char *symbol_name,
+ hsa_agent_t agent,
+ int32_t call_convention,
+ hsa_executable_symbol_t *symbol);
+
+/**
+ * @brief Executable symbol attributes.
+ */
+typedef enum {
+ /**
+ * The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0,
+ /**
+ * The length of the symbol name. The type of this attribute is uint32_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1,
+ /**
+ * The name of the symbol. The type of this attribute is character array with
+ * the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH
+ * attribute
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2,
+ /**
+ * The length of the module name to which this symbol belongs if this symbol
+ * has module linkage, otherwise 0 is returned. The type of this attribute is
+ * uint32_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
+ /**
+ * The module name to which this symbol belongs if this symbol has module
+ * linkage, otherwise empty string is returned. The type of this attribute is
+ * character array with the length equal to the value of
+ * ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4,
+ /**
+ * Agent associated with this symbol. If the symbol is a variable, the
+ * value of this attribute is only defined if
+ * ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is
+ * ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20,
+ /**
+ * The address of the variable. The value of this attribute is undefined if
+ * the symbol is not a variable. The type of this attribute is uint64_t.
+ *
+ * If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is
+ * returned.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21,
+ /**
+ * The linkage kind of the symbol. The type of this attribute is
+ * ::hsa_symbol_linkage_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5,
+ /**
+ * Indicates whether the symbol corresponds to a definition. The type of this
+ * attribute is bool.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17,
+ /**
+ * The allocation kind of the variable. The value of this attribute is
+ * undefined if the symbol is not a variable. The type of this attribute is
+ * ::hsa_variable_allocation_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
+ /**
+ * The segment kind of the variable. The value of this attribute is undefined
+ * if the symbol is not a variable. The type of this attribute is
+ * ::hsa_variable_segment_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
+ /**
+ * Alignment of the variable. The value of this attribute is undefined if
+ * the symbol is not a variable. The type of this attribute is uint32_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
+ /**
+ * Size of the variable. The value of this attribute is undefined if
+ * the symbol is not a variable. The type of this attribute is uint32_t.
+ *
+ * A value of 0 is returned if the variable is an external variable and has an
+ * unknown dimension.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9,
+ /**
+ * Indicates whether the variable is constant. The value of this attribute is
+ * undefined if the symbol is not a variable. The type of this attribute is
+ * bool.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
+ /**
+ * Kernel object handle, used in the kernel dispatch packet. The value of this
+ * attribute is undefined if the symbol is not a kernel. The type of this
+ * attribute is uint64_t.
+ *
+ * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
+ * is returned.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22,
+ /**
+ * Size of kernarg segment memory that is required to hold the values of the
+ * kernel arguments, in bytes. The value of this attribute is undefined if the
+ * symbol is not a kernel. The type of this attribute is uint32_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
+ /**
+ * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
+ * which is the maximum of 16 and the maximum alignment of any of the kernel
+ * arguments. The value of this attribute is undefined if the symbol is not a
+ * kernel. The type of this attribute is uint32_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
+ /**
+ * Size of static group segment memory required by the kernel (per
+ * work-group), in bytes. The value of this attribute is undefined
+ * if the symbol is not a kernel. The type of this attribute is uint32_t.
+ *
+ * The reported amount does not include any dynamically allocated group
+ * segment memory that may be requested by the application when a kernel is
+ * dispatched.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
+ /**
+ * Size of static private, spill, and arg segment memory required by
+ * this kernel (per work-item), in bytes. The value of this attribute is
+ * undefined if the symbol is not a kernel. The type of this attribute is
+ * uint32_t.
+ *
+ * If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is
+ * true, the kernel may use more private memory than the reported value, and
+ * the application must add the dynamic call stack usage to @a
+ * private_segment_size when populating a kernel dispatch packet.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
+ /**
+ * Dynamic callstack flag. The value of this attribute is undefined if the
+ * symbol is not a kernel. The type of this attribute is bool.
+ *
+ * If this flag is set (the value is true), the kernel uses a dynamically
+ * sized call stack. This can happen if recursive calls, calls to indirect
+ * functions, or the HSAIL alloca instruction are present in the kernel.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
+ /**
+ * Indirect function object handle. The value of this attribute is undefined
+ * if the symbol is not an indirect function, or the associated agent does
+ * not support the Full Profile. The type of this attribute depends on the
+ * machine model: if machine model is small, then the type is uint32_t, if
+ * machine model is large, then the type is uint64_t.
+ *
+ * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
+ * is returned.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23,
+ /**
+ * Call convention of the indirect function. The value of this attribute is
+ * undefined if the symbol is not an indirect function, or the associated
+ * agent does not support the Full Profile. The type of this attribute is
+ * uint32_t.
+ */
+ HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
+} hsa_executable_symbol_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given executable symbol.
+ *
+ * @param[in] executable_symbol Executable symbol.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * executable symbol attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_symbol_get_info(
+ hsa_executable_symbol_t executable_symbol,
+ hsa_executable_symbol_info_t attribute,
+ void *value);
+
+/**
+ * @brief Iterate over the symbols in a executable, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] callback Callback to be invoked once per executable symbol. The
+ * HSA runtime passes three arguments to the callback: the executable, a symbol,
+ * and the application data. If @p callback returns a status other than
+ * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
+ * ::hsa_executable_iterate_symbols returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Th executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_iterate_symbols(
+ hsa_executable_t executable,
+ hsa_status_t (*callback)(hsa_executable_t executable, hsa_executable_symbol_t symbol, void* data),
+ void* data);
+
+/** @} */
+
+#ifdef __cplusplus
+} // end extern "C" block
+#endif
+
+#endif // header guard
new file mode 100644
@@ -0,0 +1,527 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+// Copyright 2014 ADVANCED MICRO DEVICES, INC.
+//
+// AMD is granting you permission to use this software and documentation(if any)
+// (collectively, the "Materials") pursuant to the terms and conditions of the
+// Software License Agreement included with the Materials.If you do not have a
+// copy of the Software License Agreement, contact your AMD representative for a
+// copy.
+//
+// You agree that you will not reverse engineer or decompile the Materials, in
+// whole or in part, except as allowed by applicable law.
+//
+// WARRANTY DISCLAIMER : THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND.AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY,
+// INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON - INFRINGEMENT, THAT THE
+// SOFTWARE WILL RUN UNINTERRUPTED OR ERROR - FREE OR WARRANTIES ARISING FROM
+// CUSTOM OF TRADE OR COURSE OF USAGE.THE ENTIRE RISK ASSOCIATED WITH THE USE OF
+// THE SOFTWARE IS ASSUMED BY YOU.Some jurisdictions do not allow the exclusion
+// of implied warranties, so the above exclusion may not apply to You.
+//
+// LIMITATION OF LIABILITY AND INDEMNIFICATION : AMD AND ITS LICENSORS WILL NOT,
+// UNDER ANY CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT,
+// INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF
+// THE SOFTWARE OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.In no event shall AMD's total
+// liability to You for all damages, losses, and causes of action (whether in
+// contract, tort (including negligence) or otherwise) exceed the amount of $100
+// USD. You agree to defend, indemnify and hold harmless AMD and its licensors,
+// and any of their directors, officers, employees, affiliates or agents from
+// and against any and all loss, damage, liability and other expenses (including
+// reasonable attorneys' fees), resulting from Your use of the Software or
+// violation of the terms and conditions of this Agreement.
+//
+// U.S.GOVERNMENT RESTRICTED RIGHTS : The Materials are provided with
+// "RESTRICTED RIGHTS." Use, duplication, or disclosure by the Government is
+// subject to the restrictions as set forth in FAR 52.227 - 14 and DFAR252.227 -
+// 7013, et seq., or its successor.Use of the Materials by the Government
+// constitutes acknowledgement of AMD's proprietary rights in them.
+//
+// EXPORT RESTRICTIONS: The Materials may be subject to export restrictions as
+// stated in the Software License Agreement.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
+#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
+
+#include "hsa.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef void* BrigModule_t;
+
+/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
+ * @{
+ */
+
+/**
+ * @brief Enumeration constants added to ::hsa_status_t by this extension.
+ */
+enum {
+ /**
+ * The HSAIL program is invalid.
+ */
+ HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
+ /**
+ * The HSAIL module is invalid.
+ */
+ HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
+ /**
+ * Machine model or profile of the HSAIL module do not match the machine model
+ * or profile of the HSAIL program.
+ */
+ HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
+ /**
+ * The HSAIL module is already a part of the HSAIL program.
+ */
+ HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
+ /**
+ * Compatibility mismatch between symbol declaration and symbol definition.
+ */
+ HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
+ /**
+ * The finalization encountered an error while finalizing a kernel or
+ * indirect function.
+ */
+ HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
+ /**
+ * Mismatch between a directive in the control directive structure and in
+ * the HSAIL kernel.
+ */
+ HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
+};
+
+/** @} */
+
+/** \defgroup ext-alt-finalizer-program Finalization Program
+ * @{
+ */
+
+/**
+ * @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
+ * the definition of the BrigModule_t type.
+ */
+typedef BrigModule_t hsa_ext_module_t;
+
+/**
+ * @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
+ * modules that collectively define functions and variables used by kernels and
+ * indirect functions.
+ */
+typedef struct hsa_ext_program_s {
+ /**
+ * Opaque handle.
+ */
+ uint64_t handle;
+} hsa_ext_program_t;
+
+/**
+ * @brief Create an empty HSAIL program.
+ *
+ * @param[in] machine_model Machine model used in the HSAIL program.
+ *
+ * @param[in] profile Profile used in the HSAIL program.
+ *
+ * @param[in] default_float_rounding_mode Default float rounding mode used in
+ * the HSAIL program.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] program Memory location where the HSA runtime stores the newly
+ * created HSAIL program handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
+ * @p profile is invalid, @p default_float_rounding_mode is invalid, or
+ * @p program is NULL.
+ */
+hsa_status_t HSA_API hsa_ext_program_create(
+ hsa_machine_model_t machine_model,
+ hsa_profile_t profile,
+ hsa_default_float_rounding_mode_t default_float_rounding_mode,
+ const char *options,
+ hsa_ext_program_t *program);
+
+/**
+ * @brief Destroy a HSAIL program.
+ *
+ * @details The HSAIL program handle becomes invalid after it has been
+ * destroyed. Code object handles produced by ::hsa_ext_program_finalize are
+ * still valid after the HSAIL program has been destroyed, and can be used as
+ * intended. Resources allocated outside and associated with the HSAIL program
+ * (such as HSAIL modules that are added to the HSAIL program) can be released
+ * after the finalization program has been destroyed.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
+ * invalid.
+ */
+hsa_status_t HSA_API hsa_ext_program_destroy(
+ hsa_ext_program_t program);
+
+/**
+ * @brief Add a HSAIL module to an existing HSAIL program.
+ *
+ * @details The HSA runtime does not perform a deep copy of the HSAIL module
+ * upon addition. Instead, it stores a pointer to the HSAIL module. The
+ * ownership of the HSAIL module belongs to the application, which must ensure
+ * that @p module is not released before destroying the HSAIL program.
+ *
+ * The HSAIL module is successfully added to the HSAIL program if @p module is
+ * valid, if all the declarations and definitions for the same symbol are
+ * compatible, and if @p module specify machine model and profile that matches
+ * the HSAIL program.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] module HSAIL module. The application can add the same HSAIL module
+ * to @p program at most once. The HSAIL module must specify the same machine
+ * model and profile as @p program. If the floating-mode rounding mode of @p
+ * module is not default, then it should match that of @p program.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
+ * module does not match machine model of @p program, or the profile of @p
+ * module does not match profile of @p program.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
+ * already a part of the HSAIL program.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
+ * definition compatibility mismatch. See the symbol compatibility rules in the
+ * HSA Programming Reference Manual.
+ */
+hsa_status_t HSA_API hsa_ext_program_add_module(
+ hsa_ext_program_t program,
+ hsa_ext_module_t module);
+
+/**
+ * @brief Iterate over the HSAIL modules in a program, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] callback Callback to be invoked once per HSAIL module in the
+ * program. The HSA runtime passes three arguments to the callback: the program,
+ * a HSAIL module, and the application data. If @p callback returns a status
+ * other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
+ * stops and ::hsa_ext_program_iterate_modules returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_ext_program_iterate_modules(
+ hsa_ext_program_t program,
+ hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
+ void* data),
+ void* data);
+
+/**
+ * @brief HSAIL program attributes.
+ */
+typedef enum {
+ /**
+ * Machine model specified when the HSAIL program was created. The type
+ * of this attribute is ::hsa_machine_model_t.
+ */
+ HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
+ /**
+ * Profile specified when the HSAIL program was created. The type of
+ * this attribute is ::hsa_profile_t.
+ */
+ HSA_EXT_PROGRAM_INFO_PROFILE = 1,
+ /**
+ * Default float rounding mode specified when the HSAIL program was
+ * created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
+ */
+ HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
+} hsa_ext_program_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given HSAIL program.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behaviour is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * HSAIL program attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_ext_program_get_info(
+ hsa_ext_program_t program,
+ hsa_ext_program_info_t attribute,
+ void *value);
+
+/**
+ * @brief Finalizer-determined call convention.
+ */
+typedef enum {
+ /**
+ * Finalizer-determined call convention.
+ */
+ HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
+} hsa_ext_finalizer_call_convention_t;
+
+/**
+ * @brief Control directives specify low-level information about the
+ * finalization process.
+ */
+typedef struct hsa_ext_control_directives_s {
+ /**
+ * Bitset indicating which control directives are enabled. The bit assigned to
+ * a control directive is determined by the corresponding value in
+ * BrigControlDirective.
+ *
+ * If a control directive is disabled, its corresponding field value (if any)
+ * must be 0. Control directives that are only present or absent (such as
+ * partial workgroups) have no corresponding field as the presence of the bit
+ * in this mask is sufficient.
+ */
+ uint64_t control_directives_mask;
+ /**
+ * Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
+ * assigned to an HSAIL exception is determined by the corresponding value
+ * in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
+ * control directive, the finalizer uses the union of the two masks.
+ */
+ uint16_t break_exceptions_mask;
+ /**
+ * Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
+ * bit assigned to an HSAIL exception is determined by the corresponding value
+ * in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
+ * control directive, the finalizer uses the union of the two masks.
+ */
+ uint16_t detect_exceptions_mask;
+ /**
+ * Maximum size (in bytes) of dynamic group memory that will be allocated by
+ * the application for any dispatch of the kernel. If the kernel contains a
+ * maxdynamicsize control directive, the two values should match.
+ */
+ uint32_t max_dynamic_group_size;
+ /**
+ * Maximum number of grid work-items that will be used by the application to
+ * launch the kernel. If the kernel contains a maxflatgridsize control
+ * directive, the value of @a max_flat_grid_size must not be greater than the
+ * value of the directive, and takes precedence.
+ *
+ * The value specified for maximum absolute grid size must be greater than or
+ * equal to the product of the values specified by @a required_grid_size.
+ *
+ * If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
+ * control_directives_mask, this field must be greater than 0.
+ */
+ uint64_t max_flat_grid_size;
+ /**
+ * Maximum number of work-group work-items that will be used by the
+ * application to launch the kernel. If the kernel contains a
+ * maxflatworkgroupsize control directive, the value of @a
+ * max_flat_workgroup_size must not be greater than the value of the
+ * directive, and takes precedence.
+ *
+ * The value specified for maximum absolute grid size must be greater than or
+ * equal to the product of the values specified by @a required_workgroup_size.
+ *
+ * If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
+ * control_directives_mask, this field must be greater than 0.
+ */
+ uint32_t max_flat_workgroup_size;
+ /**
+ * Reserved. Must be 0.
+ */
+ uint32_t reserved1;
+ /**
+ * Grid size that will be used by the application in any dispatch of the
+ * kernel. If the kernel contains a requiredgridsize control directive, the
+ * dimensions should match.
+ *
+ * The specified grid size must be consistent with @a required_workgroup_size
+ * and @a required_dim. Also, the product of the three dimensions must not
+ * exceed @a max_flat_grid_size. Note that the listed invariants must hold
+ * only if all the corresponding control directives are enabled.
+ *
+ * If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
+ * control_directives_mask, the three dimension values must be greater than 0.
+ */
+ uint64_t required_grid_size[3];
+ /**
+ * Work-group size that will be used by the application in any dispatch of the
+ * kernel. If the kernel contains a requiredworkgroupsize control directive,
+ * the dimensions should match.
+ *
+ * The specified work-group size must be consistent with @a required_grid_size
+ * and @a required_dim. Also, the product of the three dimensions must not
+ * exceed @a max_flat_workgroup_size. Note that the listed invariants must
+ * hold only if all the corresponding control directives are enabled.
+ *
+ * If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
+ * control_directives_mask, the three dimension values must be greater than 0.
+ */
+ hsa_dim3_t required_workgroup_size;
+ /**
+ * Number of dimensions that will be used by the application to launch the
+ * kernel. If the kernel contains a requireddim control directive, the two
+ * values should match.
+ *
+ * The specified dimensions must be consistent with @a required_grid_size and
+ * @a required_workgroup_size. This invariant must hold only if all the
+ * corresponding control directives are enabled.
+ *
+ * If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
+ * control_directives_mask, this field must be 1, 2, or 3.
+ */
+ uint8_t required_dim;
+ /**
+ * Reserved. Must be 0.
+ */
+ uint8_t reserved2[75];
+} hsa_ext_control_directives_t;
+
+/**
+ * @brief Finalize an HSAIL program for a given instruction set architecture.
+ *
+ * @details Finalize all of the kernels and indirect functions that belong to
+ * the same HSAIL program for a specific instruction set architecture (ISA). The
+ * transitive closure of all functions specified by call or scall must be
+ * defined. Kernels and indirect functions that are being finalized must be
+ * defined. Kernels and indirect functions that are referenced in kernels and
+ * indirect functions being finalized may or may not be defined, but must be
+ * declared. All the global/readonly segment variables that are referenced in
+ * kernels and indirect functions being finalized may or may not be defined, but
+ * must be declared.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] isa Instruction set architecture to finalize for.
+ *
+ * @param[in] call_convention A call convention used in a finalization. Must
+ * have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
+ * and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
+ * isa (not inclusive).
+ *
+ * @param[in] control_directives Low-level control directives that influence
+ * the finalization process.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[in] code_object_type Type of code object to produce.
+ *
+ * @param[out] code_object Code object generated by the Finalizer, which
+ * contains the machine code for the kernels and indirect functions in the HSAIL
+ * program. The code object is independent of the HSAIL module that was used to
+ * generate it.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
+ * invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
+ * the control directive structure and in the HSAIL kernel mismatch, or if the
+ * same directive is used with a different value in one of the functions used by
+ * this kernel.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
+ * encountered an error while compiling a kernel or an indirect function.
+ */
+hsa_status_t HSA_API hsa_ext_program_finalize(
+ hsa_ext_program_t program,
+ hsa_isa_t isa,
+ int32_t call_convention,
+ hsa_ext_control_directives_t control_directives,
+ const char *options,
+ hsa_code_object_type_t code_object_type,
+ hsa_code_object_t *code_object);
+
+/** @} */
+
+#define hsa_ext_finalizer_1_00
+
+typedef struct hsa_ext_finalizer_1_00_pfn_s {
+ hsa_status_t (*hsa_ext_program_create)(
+ hsa_machine_model_t machine_model, hsa_profile_t profile,
+ hsa_default_float_rounding_mode_t default_float_rounding_mode,
+ const char *options, hsa_ext_program_t *program);
+
+ hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
+
+ hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
+ hsa_ext_module_t module);
+
+ hsa_status_t (*hsa_ext_program_iterate_modules)(
+ hsa_ext_program_t program,
+ hsa_status_t (*callback)(hsa_ext_program_t program,
+ hsa_ext_module_t module, void *data),
+ void *data);
+
+ hsa_status_t (*hsa_ext_program_get_info)(
+ hsa_ext_program_t program, hsa_ext_program_info_t attribute,
+ void *value);
+
+ hsa_status_t (*hsa_ext_program_finalize)(
+ hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
+ hsa_ext_control_directives_t control_directives, const char *options,
+ hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
+} hsa_ext_finalizer_1_00_pfn_t;
+
+#ifdef __cplusplus
+} // extern "C" block
+#endif // __cplusplus
+
+#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
deleted file mode 100644
@@ -1,177 +0,0 @@
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <dlfcn.h>
-#include "okra.h"
-
-typedef okra_status_t (*okra_get_context_func_t)(okra_context_t**);
-typedef okra_status_t (*okra_kernel_create_from_binary_func_t)(okra_context_t *, const char *, size_t , const char *, okra_kernel_t **);
-typedef okra_status_t (*okra_push_pointer_func_t)(okra_kernel_t* , void* );
-typedef okra_status_t (*okra_execute_kernel_func_t)(okra_context_t*, okra_kernel_t* , okra_range_t* );
-typedef okra_status_t (*okra_clear_args_func_t)(okra_kernel_t* );
-typedef okra_status_t (*okra_dispose_kernel_func_t)(okra_kernel_t*);
-
-static void *okralib;
-static okra_get_context_func_t _okra_get_context;
-static okra_kernel_create_from_binary_func_t _okra_kernel_create_from_binary;
-static okra_push_pointer_func_t _okra_push_pointer;
-static okra_execute_kernel_func_t _okra_execute_kernel;
-static okra_clear_args_func_t _okra_clear_args;
-
-/* Returns false on error. */
-static bool
-loadokra (void)
-{
- if (okralib)
- return okralib != (void*) -1;
- okralib = dlopen("libokra_x86_64.so", RTLD_LAZY);
- if (!okralib)
- {
- okralib = (void*) -1;
- fprintf(stderr, "Unable to load libokra_x86_64.so\n");
- return false;
- }
- _okra_get_context = (okra_get_context_func_t) dlsym(okralib, "okra_get_context");
- _okra_kernel_create_from_binary = (okra_kernel_create_from_binary_func_t)dlsym(okralib, "okra_create_kernel_from_binary");
- _okra_push_pointer = (okra_push_pointer_func_t) dlsym(okralib, "okra_push_pointer");
- _okra_execute_kernel = (okra_execute_kernel_func_t) dlsym(okralib, "okra_execute_kernel");
- _okra_clear_args = (okra_clear_args_func_t) dlsym(okralib, "okra_clear_args");
-
- if (!_okra_get_context
- || !_okra_kernel_create_from_binary
- || !_okra_push_pointer
- || !_okra_execute_kernel
- || !_okra_clear_args)
- {
- dlclose (okralib);
- okralib = (void*) -1;
- fprintf (stderr, "Cannot find OKRA symbols\n");
- return false;
- }
-
- return true;
-}
-
-typedef union __hsa_kernelarg_
-{
- void* addr; ///< pointer to a buffer
- int32_t s32value; ///< signed 32 bit value
- uint32_t u32value; ///< unsigned 32 bit value
- float fvalue; ///< float value
- double dvalue; ///< double value
- int64_t s64value; ///< signed 64 bit value
- uint64_t u64value; ///< unsigned 64 bit value
-} __hsa_kernelarg;
-
-typedef struct __hsa_launch_attrs_
-{
- uint64_t flags;
- uint64_t grid[3];
- uint64_t group[3];
-} __hsa_launch_attrs;
-
-typedef struct __hsa_kernel_desc_
-{
- const char *filename;
- const char *name;
- uint64_t nargs;
- okra_context_t *kernel;
- okra_context_t *context;
-} __hsa_kernel_desc;
-
-typedef okra_range_t __hsa_launch_range;
-
-void * __hsa_launch_kernel (__hsa_kernel_desc *, __hsa_launch_range *,
- __hsa_kernelarg *);
-
-void *
-__hsa_launch_kernel (__hsa_kernel_desc * _kd, __hsa_launch_range *range_p,
- __hsa_kernelarg *args)
-{
- okra_status_t status;
- okra_context_t *context;
- okra_kernel_t *kernel;
- okra_range_t range;
- unsigned int i = 0;
- static int debug = 0;
-
- if (!loadokra ())
- return NULL;
-
- if (!debug)
- {
- if (getenv ("HSA_DEBUG"))
- debug = 1;
- else
- debug = -1;
- }
-
- if (_kd->context)
- context = _kd->context ;
- else {
- status = _okra_get_context(&context);
- if (status != OKRA_SUCCESS)
- {
- fprintf (stderr, "Unable to create context\n");
- return NULL;
- }
- _kd->context = context ;
- }
- if (_kd->kernel)
- {
- kernel = _kd->kernel;
- }
- else
- {
- size_t size = 1;
- const char* pfile;
- const char* fileName = _kd->filename;
- if (_kd->filename[0] == 0)
- fileName = "hsakernel.o";
- pfile = (const char *) fopen (fileName, "rb");
- if (!pfile)
- {
- fprintf (stderr, "Unable to open file %s\n", fileName);
- return NULL;
- }
- status = _okra_kernel_create_from_binary(context, pfile, size, _kd->name, &kernel);
- fclose((FILE *)pfile);
- if (status != OKRA_SUCCESS)
- {
- fprintf (stderr, "Unable to create Kernel\n");
- return NULL;
- }
- _kd->kernel = kernel;
- }
-
- _okra_clear_args (kernel);
- for (; i < _kd->nargs; i++) {
- void *cur_args = args[i].addr;
- _okra_push_pointer (kernel, cur_args);
- }
- /* set launch dimensions */
- range.dimension = 1;
- range.global_size[0] = 256;
- range.group_size[0] = 16;
- if (!range_p)
- {
- range.dimension = 1;
- range.global_size[0] = 256;
- range.group_size[0] = 16;
- range_p = ⦥
- }
- if (debug > 0)
- {
- fprintf (stderr, "HSA: launching kernel %s\n", _kd->name);
- fprintf (stderr, "dim: %u, s0: %u, g0: %u, r: %u\n", range_p->dimension,
- range_p->global_size[0], range_p->group_size[0],
- range_p->reserved);
- }
- status = _okra_execute_kernel (context, kernel, range_p);
- if (status != OKRA_SUCCESS)
- {
- fprintf (stderr, "Failed to launch kernel\n");
- return NULL;
- }
- return kernel;
-}
@@ -236,4 +236,5 @@ GOMP_4.0.1 {
HSA_1.0 {
global:
__hsa_launch_kernel;
+ __hsa_register_image;
} GOMP_4.0.1;
deleted file mode 100644
@@ -1,147 +0,0 @@
-/*
-OKRA Runtime C interface
-*/
-
-#ifndef OKRA_H
-#define OKRA_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-#if defined (_WIN32)
- #ifndef __EXPORT__
- #define OKRA_API __declspec(dllimport)
- #else
- #define OKRA_API __declspec(dllexport)
- #endif
-#else
- #define OKRA_API
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-//opaque okra context
-typedef uint64_t okra_context_t;
-
-//opaque okra kernel
-typedef uint64_t okra_kernel_t;
-
-//launch attributes that defines execution range
-typedef struct okra_range_s
-{
- uint32_t dimension; //max value is 3
- uint32_t global_size[3];
- uint32_t group_size[3];
- uint32_t reserved; //For future use
-} okra_range_t;
-
-
-//This is the list of errors that okra supports
-//@Note: Will add more error codes as needed
-typedef enum okra_status_t {
- OKRA_SUCCESS=0,
- OKRA_CONTEXT_NO_DEVICE_FOUND,
- OKRA_CONTEXT_QUEUE_CREATION_FAILED,
- OKRA_SET_MEMORY_POLICY_FAILED,
- OKRA_MEMORY_REGISTRATION_FAILED,
- OKRA_MEMORY_DEREGISTRATION_FAILED,
- OKRA_CONTEXT_CREATE_FAILED,
- OKRA_CONTEXT_ALREADY_EXIST,
- OKRA_KERNEL_HSAIL_ASSEMBLING_FAILED,
- OKRA_KERNEL_FINALIZE_FAILED,
- OKRA_KERNEL_CREATE_FAILED,
- OKRA_KERNEL_ELF_INITIALIZATION_FAILED,
- OKRA_KERNEL_INVALID_ELF_CONTAINER,
- OKRA_KERNEL_INVALID_SECTION_HEADER,
- OKRA_KERNEL_MISSING_STRING_SECTION,
- OKRA_KERNEL_MISSING_DIRECTIVE_SECTION,
- OKRA_KERNEL_MISSING_CODE_SECTION,
- OKRA_KERNEL_MISSING_OPERANDS_SECTION,
- OKRA_KERNEL_MISSING_DEBUG_SECTION,
- OKRA_LOAD_BRIG_FAILED,
- OKRA_UNLOAD_BRIG_FAILED,
- OKRA_KERNEL_CREATE_FROM_BINARY_FAILED,
- OKRA_KERNEL_PUSH_KERNARG_FAILED,
- OKRA_KERNEL_CLEARARG_FAILED,
- OKRA_RANGE_INVALID_DIMENSION,
- OKRA_RANGE_INVALID_GLOBAL_SIZE,
- OKRA_RANGE_INVALID_GROUP_SIZE,
- OKRA_EXECUTE_FAILED,
- OKRA_DISPOSE_FAILED,
- OKRA_INVALID_ARGUMENT,
- OKRA_UNKNOWN
-}okra_status_t;
-
-//Get a okra context - does device detection, command queue creation internally
-//Note context is singleton at the moment - may change later if requirement
-//changes
-//This means you have one context, device and queue per process, but sufficient
-//for most common cases
-okra_status_t OKRA_API okra_get_context(okra_context_t** context);
-
-//create kernel that can be dispatched - takes in hsail text as input and creates
-// a kernel - does HSAIL assembling and finalize
-okra_status_t OKRA_API okra_create_kernel(okra_context_t* context,
- const char *hsail_source, const char *entryName,
- okra_kernel_t **kernel);
-
-//create kernel that can be dispatched - takes in binary as input and creates a
-//kernel
-okra_status_t OKRA_API okra_create_kernel_from_binary(okra_context_t *context,
- const char *binary, size_t size, const char *entryName,
- okra_kernel_t **kernel);
-
-//Following are set of apis to push kernel args to the kernel
-//for pointers and objects
-okra_status_t OKRA_API okra_push_pointer(okra_kernel_t* kernel,
- void* address);
-
-//unsigned char is equivalent for jboolean-use this for passing boolean
-//if using from java world
-okra_status_t OKRA_API okra_push_boolean(okra_kernel_t* kernel,
- unsigned char value);
-
-//char is equivalent for jbyte-use this for passing byte if using
-//from java world
-okra_status_t OKRA_API okra_push_byte(okra_kernel_t* kernel,
- char value);
-
-//use this if passing jdouble from java world
-okra_status_t OKRA_API okra_push_double(okra_kernel_t* kernel,
- double value);
-
-//use this if passing jfloat from java world
-okra_status_t OKRA_API okra_push_float(okra_kernel_t* kernel,
- float value);
-
-//use this if passing jint from java world
-okra_status_t OKRA_API okra_push_int(okra_kernel_t* kernel,
- int value);
-
-//use this if passing jlong from java world
-okra_status_t OKRA_API okra_push_long(okra_kernel_t* kernel,
- long value);
-
-// Call clearargs between executions of a kernel before setting the new args
-okra_status_t OKRA_API okra_clear_args(okra_kernel_t* kernel);
-//end of kernel arg related APIs
-
-//execute the kernel - takes kernel, execution range as input
-//This is a synchronous call - returns only after kernel completion
-//If the user pass 0's for group size, the runtime will choose one
-okra_status_t OKRA_API okra_execute_kernel(okra_context_t* context, okra_kernel_t* kernel, okra_range_t* range);
-
-//cleanup kernel
-okra_status_t OKRA_API okra_dispose_kernel(okra_kernel_t* kernel);
-
-//cleanup any resource allocated by okra context
-okra_status_t OKRA_API okra_dispose_context(okra_context_t* context);
-
-
-#ifdef __cplusplus
-} // end of extern "C"
-#endif
-
-#endif //OKRA_H