@@ -1,288 +1,154 @@
-Please note that in this particular revision, this README file is
-outdated, a new version reacting to all incompatible changes will be
-committed at the end of a batch. I suggest you only use this revision
-if you really have a reason to do it, otherwise check out a more
-recent one.
-
-In particular, in this revision it is impossible to directly use the
-simulator any more, only the OKRA interface is supported (but you can
-still use the simulator through it). Even the rest of the
-instructions are not quite up to date. You need to use LIBHSAIL
-supporting HSAIL 1.0p and only extract different sections from the
-object files.
-
-===========================================================================
-
-For playing with the HSA branch there are two viable approaches:
-1) Use the HSA simulator directly (via a shared library)
-2) Use the OKRA wrapper, which comes in two flavors itself,
- supporting the simulator and supporting hardware
-
-Using the OKRA layer results in a small restriction (see below), but
-is currently the only way to see the HSA code running on hardware.
-For debugging it makes sense to have the simulator available anyway,
-and as it's a bit cumbersome to set up, we'll deal with that first.
-If you only want to play with hardware skip below until OKRA.
-
-======================= GCC + HSA Simulator ===============================
-
-The important things for the simulator
-are split over two git repositories, but for building they
-are integrated. They'll also need a specific LLVM version.
-You don't need to hunt those for yourself, downloading it is
-integrated into the build process if you set the right symlinks
-(HSAIL-Tools needs to be under HSAIL-Instruction-Set-Simulator,
-see below).
-
-Make sure you have libdwarf and libelf development libraries and
-headers installed, then git clone the two repositories:
-
-% mkdir hsa; cd hsa
-% git clone https://github.com/HSAFoundation/HSAIL-Instruction-Set-Simulator.git% git clone https://github.com/HSAFoundation/HSAIL-Tools
-% cd HSAIL-Instruction-Set-Simulator/src/
-% ln -sf ../../HSAIL-Tools .
-% cd ../
-% mkdir dev; cd dev
-% cmake -DCMAKE_BUILD_TYPE=Debug ..
-
-This will also checkout LLVM in the correct version as necessary for
-the simulator. Build the HSA simulator (which also builds libHSAIL):
-
-% cd HSAIL-Instruction-Set-Simulator/dev
-% make _DBG=1
-
-If something breaks here or in the cmake call before it's probably
-missing development packages, which you'll have to figure out and
-install somehow.
-
-Test the HSAIL simulator:
-
-% ./fcos
-fcos(-3.141593e+00) = -1.000000e+00
-fcos(-1.570796e+00) = -4.371139e-08
-fcos(0.000000e+00) = 1.000000e+00
-fcos(1.570796e+00) = -4.371139e-08
-fcos(3.141593e+00) = -1.000000e+00
-
-Now you'll want to apply two patches, hsail-iss1.diff and
-hsail-tools1.diff, the former adds the creation of libgcchsa.so,
-the latter enables libHSAIL to not check the ELF machine type
-of kernel containers (so it can load normal i386 files), so that
-hsailasm can disassemble our .o files. Both
-patches are included in the gcc tree parallel to this README.hsa.
-
-% cd HSAIL-Instruction-Set-Simulator; patch -p1 < hsail-iss1.diff
-% cd ../HSAIL-Tools; patch -p1 < hsail-tools1.diff
-
-Remake and check if everything still builds. In particular you now
-should have a library in dev/build/Debug/lib/libgcchsa.so (or Release
-instead of Debug).
-
-Configure and build the GCC hsa branch without bootstrapping and for C/C++
-only (configure --disable-bootstrap --enable-languages=c,c++).
-
-Try compiling a HSA function:
-
-% cat hsakernel.c
-extern void square (int *ip, int *rp) __attribute__((hsa, noinline));
-void __attribute__((hsa, noinline)) square (int *ip, int *rp)
-{
- int i = *ip;
- *rp = i * i;
+ General information
+
+The hsa branch is a development and experimental branch which can
+produce HSAIL code for HSA capable accelerators. Its contents can be
+roughly divided into two parts. First, the files with the hsa prefix
+contain expansion of specially marked gimple functions to HSAIL and
+while they still need a lot of work they are meant as the basis for
+future inclusion in trunk. Changes in the rest of the gcc,
+particularly in omp-low.c are of a much more experimental nature and
+we are aware that many of them will need to be re-worked.
+
+In particular, the branch currently attempts to compile all OMP
+parallel regions as HSA kernels and strives to strip as much control
+flow from simple single OMP parallel loops. This makes experimenting
+with current OMP tests and benchmarks easier but of course it is not
+how thing s are eventually expected to work and we are aware that what
+will remain will have to be implemented differently. In particular,
+integration with gomp4 branch development is planned as well.
+
+======================================================================
+
+ Setup
+
+HSA branch now produces HSAIL version 1.0p which isn't currently
+supported by the only simulator I am aware of. Until it is upgraded
+you will need real HSA capable hardware to execute code generated by
+this branch.
+
+In order to run the code produced by the HSA branch on such hardware,
+you'll need the following (assuming you run on x86_64-linux, so far we
+have not tested anything else):
+
+1. Kernel driver. This step is out of scope of this document and
+ depends heavily on your environment, especially on what Linux
+ distribution you are running.
+
+2. libhsakmt.so.1 from HSA-Drivers-Linux-AMD repository. This is the
+ same repository where you might have your kernel from. If not,
+ check it out from github:
+
+ $ mkdir $HOME/testhsa
+ $ cd $HOME/testhsa
+ $ git clone 'https://github.com/HSAFoundation/HSA-Drivers-Linux-AMD'
+
+ Make sure the path to the shared object is in your
+ LD_LIBRARY_PATH. If you use bash, in the example above you would
+ issue:
+
+ $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/testhsa/HSA-Drivers-Linux-AMD/kfd-0.8/libhsakmt/lnx64a
+
+3. libhsa-runtime64.so from HSA-Runtime-AMD repository, which you can
+ clone from github too. Also make sure the shared object is in your
+ LD_LIBRARY_PATH:
+
+ $ git clone 'https://github.com/HSAFoundation/HSA-Runtime-AMD'
+ $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/testhsa/HSA-Runtime-AMD/lib/x86_64
+
+4. OKRA (Offloadable Kernel Runtime API) interface. Get it from
+ github and put into your LD_LIBRARY_PATH as well:
+
+ $ git clone 'https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device'
+ $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/testhsa/Okra-Interface-to-HSA-Device/okra/dist/bin
+
+ You can now also try the sample check shipped with OKRA to test
+ whether your setup works. Go into directory
+ Okra-Interface-to-HSA-Device/okra/samples, edit the file env.sh as
+ indicated by the comments there and then run scripts build.sh and
+ subsequently runSquares.sh. The test should print "PASSED" before
+ terminating.
+
+OK, now you are ready to compile and run your first example.
+Configure and compile the hsa branch gcc as usual and install it into
+some directory. Mine is in $HOME/gcc/hsa/inst/ and I will be using
+that in the following example. Lets assume you have the following
+source code in file omp_veccopy.c:
+
+----------------------------------------
+#include <omp.h>
+#include <stdio.h>
+#include <string.h>
+#define N 256
+void printList(int *src, int *dst, int size) {
+ int idx = 0;
+ for (; idx < size; idx++) {
+ printf ("Src[%d] : %d Dst[%d] : %d \n", idx, src[idx], idx, dst[idx]);
+ }
}
-% ./gcc/xgcc -B./gcc/ -c hsakernel.c
-
-You should have a hsakernel.o file that contains a BRIG kernel '&square'.
-To verify, disassemble that one. There's a caveat: the disassembler
-only knows how to load ELF32 files, so if the above produced an ELF64
-file, you first have to copy around the BRIG sections to a new ELF32 binary:
-
-% objcopy -O elf32-i386 -j .brig_strtab -j .brig_directives -j .brig_code -j .brig_operands -j .brig_debug hsakernel.o disasmme.o
-
-Note that this is only necessary for the disassembler. The simulator
-itself is able to deal with ELF64 files just fine. Now we can disassemble it:
-
-% ../HSAIL-Instruction-Set-Simulator/dev/HSAIL-Tools/hsailasm -disassemble disasmme.o
-% less disasmme.hsail
-
-Now build a program containing a call to that kernel:
-% cat hsacall.c
-typedef __SIZE_TYPE__ size_t;
-extern void *malloc (size_t);
-extern int printf (const char *, ...);
-
-extern void square (int *ip, int *rp) __attribute__((hsa, noinline));
-
-int main ()
-{
- int i;
- int *bla = malloc(2*sizeof(bla[0]));
- printf ("Square sequence: ");
- for (i = 1; i < 20; i++)
- {
- int r;
- bla[0] = i;
- //square (&i, &r);
- square (&bla[0], &bla[1]);
- r = bla[1];
- printf ("%d ", r);
- }
- printf ("\n");
+
+void veccpy(int *a, int *b, int n) {
+
+ int j;
+ #pragma omp parallel for
+ for( j = 0; j<n;j++) {
+ b[j] = a[j];
+ }
+}
+
+int main() {
+ int a[N], b[N], i;
+
+ for (i = 0; i< N; i++) {
+ a[i] = i;
+ }
+
+ veccpy(a, b, N);
+
+ if (memcmp(a, b, N * sizeof (int)) != 0) {
+ printList((int *)a, (int *)b, N);
+ printf ("Vector Copy C case - Failed \n");
+ } else {
+ printf ("Vector Copy C case - Passed \n");
+ }
return 0;
}
+----------------------------------------
+
+Compile it with the branch:
+
+ $ $HOME/gcc/hsa/inst/bin/gcc -lm -fopenmp -O -c omp_veccopy.c
+
+If you also provide the -fdump-tree-ompexp-details option to the
+compiler, it will create a file with .ompexp suffix which you can
+search for optimization notes indicating whether the compiler has
+succeeded in turning OMP loops into kernels stripped off all
+OMP-generated control flow and suitable for a GPGPU. If it for some
+reason failed, the note will also give you the reason why. In this
+example, however, it reports success like this:
+
+omp_veccopy.c:15:12: note: Parallel construct will be turned into an HSA kernel
+
+The next step is to extract the HSA ELF sections to a 32-bit file
+called hsakernel.o. OKRA currently handles 32-bit ELF files only.
+This step is currently necessary but there is ongoing work aimed at
+eliminating it (yes, it means that currently there is no way of
+combining HSA from different compilation units):
+
+ $ objcopy -O elf32-i386 -j hsa_data -j hsa_code -j hsa_operand omp_veccopy.o hsakernel.o
+
+Now link the program as usual, providing a path to libgomp of the
+installed hsa branch gcc:
+
+ $ $HOME/gcc/hsa/inst/bin/gcc omp_veccopy.o -lm -fopenmp -Wl,-rpath,$HOME/gcc/hsa/inst/lib64 -o omp_veccopy
+
+Assuming you have all necessary libraries in your LD_LIBRARY_PATH, you
+can now run the example:
+
+ $ ./omp_veccopy
-% ./gcc/xgcc -B./gcc/ -c hsacall.c
-% gcc -o hsacall hsacall.o hsakernel.o ../HSAIL-Instruction-Set-Simulator/dev/build/Debug/lib/libgcchsa.so
-
-Now we should be able to call it:
-
-% LD_LIBRARY_PATH=../HSAIL-Instruction-Set-Simulator/dev/build/Debug/lib/ \
- ./hsacall
-Square sequence: 1 4 9 16 25 36 49 64 81 100 121 144 169 196 225 256 289 324 361
-
-You can debug with gdb (try breakpointing on __hsa_launch_kernel).
-
-Now experiment. You'll find many missing things :-)
-
-OpenMP
--------------------------
-
-The HSA branch contains some early support to transform openMP code
-into HSAIL code, for now only the openMP parallel pragma is implemented,
-e.g. this loop:
-
- /* Launch threads to */
- #pragma omp parallel for shared(z, target_lat, target_long) private(i, tmp_lat, tmp_long)
- for( i = 0 ; i < rec_count ; i++ ) {
- tmp_lat = floatbuf[2*i];
- tmp_long = floatbuf[2*i+1];
- z[i] = sqrt(( (tmp_lat-target_lat) * (tmp_lat-target_lat) )+( (tmp_long-target_long) * (tmp_long-target_long) ));
- } /* omp end parallel */
-
-can be transformed into an HSA kernel. Just use -fopenmp for compilation.
-
-========================= GCC + OKRA bindings ==============================
-
-If you want hardware support you need to use the okra bindings towards
-the HSA backend. There are two: one using the simulator (sources available),
-and another using hardware (binary only). There may be more in the future.
-Right now both are a shared module named libokra_x86_64.so, which is loaded
-dynamically by libgomp. Depending which one is loaded you'll get the
-simluator or the hardware one. If you link your programs additionally
-against the above libgcchsa.so then the HSA simulator is used directly
-and the OKRA binding is ignored. (Technically that's done by GCC emitting
-calls to __hsa_launch_kernel, which exists in libgcchsa.so and in libgomp.so.
-If libgcchsa.so is linked explicitely it's linked before libgomp.so,
-so that version is found first. Otherwise the libgomp.so variant is used,
-which uses the okra wrapper itself.)
-
-The okra wrapper for the simulator is here:
- https://github.com/HSAFoundation/Okra-Interface-to-HSAIL-Simulator
-The okra wrapper for hardware here:
- https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device
-
-========================== okra for simulator ==========================
-
-For the simulator-okra you need the simulator itself first, see above
-for building it. Once done you can build the simulator-okra. The repo
-itself contains only a method which needs java and ant to build this, it's
-easier to use this Makefile (to be placed into the toplevel dir, parallel
-to build-okra-sim.xml):
-
------------------ Makefile ----------------------------
-HSADIR=..../HSAIL-Instruction-Set-Simulator
-HSALIBFLAVOR=Debug
-HSADEVDIR=$(HSADIR)/dev
-LIBS=-L $(HSADEVDIR)/build/$(HSALIBFLAVOR)/lib/ -Wl,-rpath,$(HSADEVDIR)/build/$(HSALIBFLAVOR)/lib/ -lgcchsa
-INCLUDES=-Isrc/cpp -I$(HSADIR)/include
-SOURCES=src/cpp/okra_c_interface.cpp src/cpp/okraContextSimulator.cpp
-OBJECTS=$(SOURCES:.cpp=.o)
-CXXFLAGS=-g -fPIC $(INCLUDES)
-
-all: dist/bin/libokra_x86_64.so
-
-dist/bin/libokra_x86_64.so: $(OBJECTS)
- echo $(OBJECTS)
- echo $(SOURCES)
- mkdir -p dist/bin
- g++ -shared -fPIC -Wl,-soname,libokra_x86_64.so -o $@ $(OBJECTS) $(LIBS)
-
-.cpp.o:
- g++ -c $(CXXFLAGS) -o $@ $<
--------------------------------------------------------
-
-Make sure you transfer the file correctly (Tabs before commands in rules!)
-and to adjust the HSADIR variable to point to the simulator as set up
-from the above section.
-
-After make you should have ended up with a file dist/bin/libokra_x86_64.so.
-The hardware okra wrapper repository contains that file as binary blob,
-together with some other shared libraries that actually implement the
-finalizer for hardware.
-
-Either way you have a libokra_x86_64.so file now which can be dynamically
-loaded by libgomp when it can be found by the dynamic loader (i.e.
-set LD_LIBRARY_PATH or copy to your current directory).
-
-There is one caveat with using the okra wrappers: for HSA kernels only
-the ascii form is accepted at this point, and all kernel must be in a file
-called hsakernel.hsail in the current directory. This restriction will
-be lifted once the HSA spec is finalized to version 1.0 (and the restriction
-of having just one file name might be lifted before).
-
-GCC will directly generate the binary form of HSA kernels in ELF sections
-of the associated .o files. To generate an ASCII file from that you
-need the hsailasm helper program (which can also disassemble), binutils
-and this script:
-
-% cat hsacreatekernel.sh
-#!/bin/sh
-ld -r -o hsakernel.all.o ${1+"$@"}
-objcopy -O elf32-i386 -j .brig_strtab -j .brig_directives \
- -j .brig_code -j .brig_operands -j .brig_debug hsakernel.all.o hsakernel.o
-rm -f hsakernel.all.o
-hsailasm -disassemble hsakernel.o
-
-It takes all object filenames containing BRIG sections, merges them,
-and disassembles this into hsakernel.hsail.
-
-======================= OKRA Example ==============================
-
-This trivial fortran program will square a vector with openMP:
-
-% cat omp_vecsquare.f90
-subroutine vector_square(n, a, b)
- integer i, n, b(n), a(n)
-!$omp parallel do
- do i=1,n
- b(i) = a(i) * a(i)
- enddo
-!$omp end parallel do
-end subroutine vector_square
-
-program main
- integer, parameter :: n=256
- integer i, b(n), a(n)
- do i=1,n
- a(i) = i
- b(i) = 0
- enddo
- call vector_square(n, a, b)
- print *, ' Array B is ', b
-end program main
-
-Do the following (needs the above scripts and helper programs in path):
-
-% p=/path/to/gcc-hsa-branch-installation
-% $p/bin/gfortran -fopenmp -c omp_vecsquare.f90
-% hsacreatekernel.sh omp_vecsquare.o
-% $p/bin/gfortran -fopenmp omp_vecsquare.o -Wl,-rpath,$p/lib64
-% LD_LIBRARY_PATH=... something where libokra_x86_64.so can be found ...
-% ./a.out
-...
-
-If you link against libgcchsa.so (as described above in the first section)
-the okra wrapper will not be used, and instead the HSA simulator directly.
-In that case there's no need for the hsakernel.hsail file (the kernel
-will be load from the BRIG sections in the executable).
+and should get a brief message that it has passed. If you get an
+error "Unable to load libokra_x86_64.so," typically it means the path
+is not quite right. If you got the correct result, you can experiment
+more on your own but be prepare to find a lot of missing functionality
+:-)
@@ -5,7 +5,7 @@
#include "okra.h"
typedef okra_status_t (*okra_get_context_func_t)(okra_context_t**);
-typedef okra_status_t (*okra_kernel_create_func_t)( okra_context_t* ,const char *, const char *,okra_kernel_t **);
+typedef okra_status_t (*okra_kernel_create_from_binary_func_t)(okra_context_t *, const char *, size_t , const char *, okra_kernel_t **);
typedef okra_status_t (*okra_push_pointer_func_t)(okra_kernel_t* , void* );
typedef okra_status_t (*okra_execute_kernel_func_t)(okra_context_t*, okra_kernel_t* , okra_range_t* );
typedef okra_status_t (*okra_clear_args_func_t)(okra_kernel_t* );
@@ -13,32 +13,11 @@ typedef okra_status_t (*okra_dispose_kernel_func_t)(okra_kernel_t*);
static void *okralib;
static okra_get_context_func_t _okra_get_context;
-static okra_kernel_create_func_t _okra_kernel_create;
+static okra_kernel_create_from_binary_func_t _okra_kernel_create_from_binary;
static okra_push_pointer_func_t _okra_push_pointer;
static okra_execute_kernel_func_t _okra_execute_kernel;
static okra_clear_args_func_t _okra_clear_args;
-static char *buildStringFromSourceFile (const char* fname)
-{
- FILE *fp;
- long filesize = 0;
- char *str;
- fp=fopen (fname, "r");
- if (!fp)
- {
- fprintf (stderr, "Unable to open the HSAIL string file %s\n", fname);
- return NULL;
- }
- fseek (fp, 0, SEEK_END);
- filesize = ftell (fp);
- rewind (fp);
- str = (char *) malloc (filesize + 1);
- fread (str, filesize, 1, fp) ;
- str[filesize] = (char) 0;
- fclose (fp);
- return str;
-}
-
/* Returns false on error. */
static bool
loadokra (void)
@@ -53,13 +32,13 @@ loadokra (void)
return false;
}
_okra_get_context = (okra_get_context_func_t) dlsym(okralib, "okra_get_context");
- _okra_kernel_create = (okra_kernel_create_func_t) dlsym(okralib, "okra_create_kernel");
+ _okra_kernel_create_from_binary = (okra_kernel_create_from_binary_func_t)dlsym(okralib, "okra_create_kernel_from_binary");
_okra_push_pointer = (okra_push_pointer_func_t) dlsym(okralib, "okra_push_pointer");
_okra_execute_kernel = (okra_execute_kernel_func_t) dlsym(okralib, "okra_execute_kernel");
_okra_clear_args = (okra_clear_args_func_t) dlsym(okralib, "okra_clear_args");
if (!_okra_get_context
- || !_okra_kernel_create
+ || !_okra_kernel_create_from_binary
|| !_okra_push_pointer
|| !_okra_execute_kernel
|| !_okra_clear_args)
@@ -144,13 +123,14 @@ __hsa_launch_kernel (__hsa_kernel_desc * _kd, __hsa_launch_range *range_p,
}
else
{
- char* hsailStr = NULL;
+ size_t size = 1;
+ const char* pfile;
const char* fileName = _kd->filename;
if (_kd->filename[0] == 0)
- fileName = "hsakernel.hsail";
- hsailStr= buildStringFromSourceFile(fileName);
- status = _okra_kernel_create(context, hsailStr, _kd->name, &kernel);
- free(hsailStr);
+ fileName = "hsakernel.o";
+ pfile = (const char *) fopen (fileName, "rb");
+ status = _okra_kernel_create_from_binary(context, pfile, size, _kd->name, &kernel);
+ fclose((FILE *)pfile);
if (status != OKRA_SUCCESS)
{
fprintf (stderr, "Unable to create Kernel\n");