@@ -11,5 +11,5 @@ if ! test -f .have-configure; then
ln -vs "$T"/source-newlib/newlib "$T"/source-gcc/newlib &&
- rm -f "$T"/install/nvptx-none/usr &&
- mkdir -p "$T"/install/nvptx-none &&
- ln -vs . "$T"/install/nvptx-none/usr &&
+ rm -f "$T"/install/offload-nvptx-none/nvptx-none/usr &&
+ mkdir -p "$T"/install/offload-nvptx-none/nvptx-none &&
+ ln -vs . "$T"/install/offload-nvptx-none/nvptx-none/usr &&
target=$("$T"/source-gcc/config.guess) &&
@@ -32,4 +32,4 @@ if ! test -f .have-configure; then
--with-sysroot=/nvptx-none \
- --with-build-sysroot="$T"/install/nvptx-none \
- --with-build-time-tools="$T"/install/nvptx-none/bin \
+ --with-build-sysroot="$T"/install/offload-nvptx-none/nvptx-none \
+ --with-build-time-tools="$T"/install/offload-nvptx-none/nvptx-none/bin \
--disable-sjlj-exceptions \
@@ -11,3 +11,3 @@ if ! test -f .have-configure; then
--target=nvptx-none \
- --prefix="$T"/install \
+ --prefix="$T"/install/offload-nvptx-none \
--with-cuda-driver-include=$CUDA/targets/x86_64-linux/include \
commit c4e9c60e860e4bd9996df196bee54d52cda64038
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Wed May 13 20:05:52 2015 +0000
nvptx offloading linking
gcc/
* config/nvptx/mkoffload.c (enum Kind, struct Token, enum Vis)
(struct Stmt): Remove.
(read_file, tokenize, write_token, write_tokens, alloc_stmt)
(alloc_comment, append_stmt, rev_stmts, write_stmt, write_stmts)
(parse_insn, parse_list_nosemi, parse_init, parse_file): Remove
functions and macros.
(decls, vars, fns): Remove variables.
(maybe_unlink): Use save_temps rather than debug to keep files.
(tool_cleanup): Unlink ptx_cfile_name and ptx_name.
(read_file): Accept a pointer to a length and store into it.
(process): Don't try to parse the input file, just write it out as a
string, but looking for maps. Also write out the length.
(main): Don't use -S to compile ptx code. Add -lgomp. Add
COLLECT_MKOFFLOAD_OPTIONS. Scan for -fopenacc and produce an empty
image if it is not set. Scan for -save-temps.
* gcc.c (mkoffload_options): New static variable.
(display_help): Mention -Xoffload
(driver_handle_option): Handle it.
(add_mkoffload_option): New static function.
(set_collect_gcc_options): If offloading, set
COLLECT_MKOFFLOAD_OPTIONS.
* doc/invoke.texi (-Xoffload): Document.
* common.opt (Xoffload): New option.
* gcc.c (process_command): Use spec_machine rather than
spec_host_machine to build tooldir_prefix2.
gcc/fortran/
* gfortranspec.c (lang_specific_driver): Add -Xoffload options to
link -lm and -lgfortran.
libgcc/
* config.host (nvptx-*): For an offloading build, add libgomp.a
and libgomp.spec to extra_parts.
* config/nvptx/t-nvptx (gomp-acc_on_device.o, gomp-tids.o)
(gomp-atomic.o, libgomp.a, libgomp.spec): New rules.
(OBJS_libgomp): New variable.
* config/nvptx/gomp-acc_on_device.c: New file.
* config/nvptx/gomp-atomic.asm: Likewise.
* config/nvptx/gomp-tids.c: Likewise.
libgomp/
* oacc-ptx.h: Remove file.
* plugin/plugin-nvptx.c: Don't include it.
(link_ptx): Accept a length argument. Don't add predefined bits of
PTX code. Look for NUL characters as file boundaries in the input
and link the multiple PTX files.
(GOMP_OFFLOAD_load_image): Get the size of PTX code from the table
and pass it to link_ptx.
* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Add
"-Xoffload -lgfortran -Xoffload -lm".
* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags):
Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@223176 138bc75d-0d04-0410-961f-82ee72b054a4
---
gcc/ChangeLog.gomp | 28 +
gcc/common.opt | 3 +
gcc/config/nvptx/mkoffload.c | 766 +++------------------
gcc/doc/invoke.texi | 7 +-
gcc/fortran/ChangeLog.gomp | 5 +
gcc/fortran/gfortranspec.c | 2 +
gcc/gcc.c | 36 +-
libgcc/ChangeLog.gomp | 12 +
libgcc/config.host | 6 +-
libgcc/config/nvptx/gomp-acc_on_device.c | 9 +
libgcc/config/nvptx/gomp-atomic.asm | 37 +
libgcc/config/nvptx/gomp-tids.c | 66 ++
libgcc/config/nvptx/t-nvptx | 13 +
libgomp/ChangeLog.gomp | 16 +
libgomp/oacc-ptx.h | 454 ------------
libgomp/plugin/plugin-nvptx.c | 91 +--
libgomp/testsuite/libgomp.fortran/fortran.exp | 5 +-
libgomp/testsuite/libgomp.oacc-fortran/fortran.exp | 5 +-
18 files changed, 383 insertions(+), 1178 deletions(-)
@@ -1,3 +1,31 @@
+2015-05-13 Bernd Schmidt <bernds@codesourcery.com>
+
+ * config/nvptx/mkoffload.c (enum Kind, struct Token, enum Vis)
+ (struct Stmt): Remove.
+ (read_file, tokenize, write_token, write_tokens, alloc_stmt)
+ (alloc_comment, append_stmt, rev_stmts, write_stmt, write_stmts)
+ (parse_insn, parse_list_nosemi, parse_init, parse_file): Remove
+ functions and macros.
+ (decls, vars, fns): Remove variables.
+ (maybe_unlink): Use save_temps rather than debug to keep files.
+ (tool_cleanup): Unlink ptx_cfile_name and ptx_name.
+ (read_file): Accept a pointer to a length and store into it.
+ (process): Don't try to parse the input file, just write it out as a
+ string, but looking for maps. Also write out the length.
+ (main): Don't use -S to compile ptx code. Add -lgomp. Add
+ COLLECT_MKOFFLOAD_OPTIONS. Scan for -fopenacc and produce an empty
+ image if it is not set. Scan for -save-temps.
+ * gcc.c (mkoffload_options): New static variable.
+ (display_help): Mention -Xoffload
+ (driver_handle_option): Handle it.
+ (add_mkoffload_option): New static function.
+ (set_collect_gcc_options): If offloading, set
+ COLLECT_MKOFFLOAD_OPTIONS.
+ * doc/invoke.texi (-Xoffload): Document.
+ * common.opt (Xoffload): New option.
+ * gcc.c (process_command): Use spec_machine rather than
+ spec_host_machine to build tooldir_prefix2.
+
2015-05-11 Thomas Schwinge <thomas@codesourcery.com>
Bernd Schmidt <bernds@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
@@ -741,6 +741,9 @@ Driver Separate
Xlinker
Driver Separate
+Xoffload
+Driver Separate
+
Xpreprocessor
Driver Separate
@@ -41,84 +41,12 @@ const char tool_name[] = "nvptx mkoffload";
#define COMMENT_PREFIX "#"
-typedef enum Kind
-{
- /* 0-ff used for single char tokens */
- K_symbol = 0x100, /* a symbol */
- K_label, /* a label defn (i.e. symbol:) */
- K_ident, /* other ident */
- K_dotted, /* dotted identifier */
- K_number,
- K_string,
- K_comment
-} Kind;
-
-typedef struct Token
-{
- unsigned short kind : 12;
- unsigned short space : 1; /* preceded by space */
- unsigned short end : 1; /* succeeded by end of line */
- /* Length of token */
- unsigned short len;
-
- /* Token itself */
- char const *ptr;
-} Token;
-
-/* statement info */
-typedef enum Vis
-{
- V_dot = 0, /* random pseudo */
- V_var = 1, /* var decl/defn */
- V_func = 2, /* func decl/defn */
- V_insn = 3, /* random insn */
- V_label = 4, /* label defn */
- V_comment = 5,
- V_pred = 6, /* predicate */
- V_mask = 0x7,
- V_global = 0x08, /* globalize */
- V_weak = 0x10, /* weakly globalize */
- V_no_eol = 0x20, /* no end of line */
- V_prefix_comment = 0x40 /* prefixed comment */
-} Vis;
-
-typedef struct Stmt
-{
- struct Stmt *next;
- Token *tokens;
- unsigned char vis;
- unsigned len : 12;
- unsigned sym : 12;
-} Stmt;
-
struct id_map
{
id_map *next;
char *ptx_name;
};
-static const char *read_file (FILE *);
-static Token *tokenize (const char *);
-
-static void write_token (FILE *, const Token *);
-static void write_tokens (FILE *, const Token *, unsigned, int);
-
-static Stmt *alloc_stmt (unsigned, Token *, Token *, const Token *);
-#define alloc_comment(S,E) alloc_stmt (V_comment, S, E, 0)
-#define append_stmt(V, S) ((S)->next = *(V), *(V) = (S))
-static Stmt *rev_stmts (Stmt *);
-static void write_stmt (FILE *, const Stmt *);
-static void write_stmts (FILE *, const Stmt *);
-
-static Token *parse_insn (Token *);
-static Token *parse_list_nosemi (Token *);
-static Token *parse_init (Token *);
-static Token *parse_file (Token *);
-
-static Stmt *decls;
-static Stmt *vars;
-static Stmt *fns;
-
static id_map *func_ids, **funcs_tail = &func_ids;
static id_map *var_ids, **vars_tail = &var_ids;
@@ -136,7 +64,7 @@ bool target_ilp32 = false;
void
maybe_unlink (const char *file)
{
- if (! debug)
+ if (!save_temps)
{
if (unlink_if_ordinary (file)
&& errno != ENOENT)
@@ -149,6 +77,10 @@ maybe_unlink (const char *file)
void
tool_cleanup (bool)
{
+ if (ptx_cfile_name)
+ maybe_unlink (ptx_cfile_name);
+ if (ptx_name)
+ maybe_unlink (ptx_name);
}
/* Add or change the value of an environment variable, outputting the
@@ -184,7 +116,7 @@ record_id (const char *p1, id_map ***where)
remember, there could be a NUL in the file itself. */
static const char *
-read_file (FILE *stream)
+read_file (FILE *stream, size_t *plen)
{
size_t alloc = 16384;
size_t base = 0;
@@ -214,557 +146,10 @@ read_file (FILE *stream)
}
}
buffer[base] = 0;
+ *plen = base;
return buffer;
}
-/* Read a token, advancing ptr.
- If we read a comment, append it to the comments block. */
-
-static Token *
-tokenize (const char *ptr)
-{
- unsigned alloc = 1000;
- unsigned num = 0;
- Token *toks = XNEWVEC (Token, alloc);
- int in_comment = 0;
- int not_comment = 0;
-
- for (;; num++)
- {
- const char *base;
- unsigned kind;
- int ws = 0;
- int eol = 0;
-
- again:
- base = ptr;
- if (in_comment)
- goto block_comment;
- switch (kind = *ptr++)
- {
- default:
- break;
-
- case '\n':
- eol = 1;
- /* Fall through */
- case ' ':
- case '\t':
- case '\r':
- case '\v':
- /* White space */
- ws = not_comment;
- goto again;
-
- case '/':
- {
- if (*ptr == '/')
- {
- /* line comment. Do not include trailing \n */
- base += 2;
- for (; *ptr; ptr++)
- if (*ptr == '\n')
- break;
- kind = K_comment;
- }
- else if (*ptr == '*')
- {
- /* block comment */
- base += 2;
- ptr++;
-
- block_comment:
- eol = in_comment;
- in_comment = 1;
- for (; *ptr; ptr++)
- {
- if (*ptr == '\n')
- {
- ptr++;
- break;
- }
- if (ptr[0] == '*' && ptr[1] == '/')
- {
- in_comment = 2;
- ptr += 2;
- break;
- }
- }
- kind = K_comment;
- }
- else
- break;
- }
- break;
-
- case '"':
- /* quoted string */
- kind = K_string;
- while (*ptr)
- if (*ptr == '"')
- {
- ptr++;
- break;
- }
- else if (*ptr++ == '\\')
- ptr++;
- break;
-
- case '.':
- if (*ptr < '0' || *ptr > '9')
- {
- kind = K_dotted;
- ws = not_comment;
- goto ident;
- }
- /* FALLTHROUGH */
- case '0'...'9':
- kind = K_number;
- goto ident;
- break;
-
- case '$': /* local labels. */
- case '%': /* register names, pseudoes etc */
- kind = K_ident;
- goto ident;
-
- case 'a'...'z':
- case 'A'...'Z':
- case '_':
- kind = K_symbol; /* possible symbol name */
- ident:
- for (; *ptr; ptr++)
- {
- if (*ptr >= 'A' && *ptr <= 'Z')
- continue;
- if (*ptr >= 'a' && *ptr <= 'z')
- continue;
- if (*ptr >= '0' && *ptr <= '9')
- continue;
- if (*ptr == '_' || *ptr == '$')
- continue;
- if (*ptr == '.' && kind != K_dotted)
- /* Idents starting with a dot, cannot have internal dots. */
- continue;
- if ((*ptr == '+' || *ptr == '-')
- && kind == K_number
- && (ptr[-1] == 'e' || ptr[-1] == 'E'
- || ptr[-1] == 'p' || ptr[-1] == 'P'))
- /* exponent */
- continue;
- break;
- }
- if (*ptr == ':')
- {
- ptr++;
- kind = K_label;
- }
- break;
- }
-
- if (alloc == num)
- {
- alloc *= 2;
- toks = XRESIZEVEC (Token, toks, alloc);
- }
- Token *tok = toks + num;
-
- tok->kind = kind;
- tok->space = ws;
- tok->end = 0;
- tok->ptr = base;
- tok->len = ptr - base - in_comment;
- in_comment &= 1;
- not_comment = kind != K_comment;
- if (eol && num)
- tok[-1].end = 1;
- if (!kind)
- break;
- }
-
- return toks;
-}
-
-/* Write an encoded token. */
-
-static void
-write_token (FILE *out, Token const *tok)
-{
- if (tok->space)
- fputc (' ', out);
-
- switch (tok->kind)
- {
- case K_string:
- {
- const char *c = tok->ptr + 1;
- size_t len = tok->len - 2;
-
- fputs ("\\\"", out);
- while (len)
- {
- const char *bs = (const char *)memchr (c, '\\', len);
- size_t l = bs ? bs - c : len;
-
- fprintf (out, "%.*s", (int)l, c);
- len -= l;
- c += l;
- if (bs)
- {
- fputs ("\\\\", out);
- len--, c++;
- }
- }
- fputs ("\\\"", out);
- }
- break;
-
- default:
- /* All other tokens shouldn't have anything magic in them */
- fprintf (out, "%.*s", tok->len, tok->ptr);
- break;
- }
- if (tok->end)
- fputs ("\\n", out);
-}
-
-static void
-write_tokens (FILE *out, Token const *toks, unsigned len, int spc)
-{
- fputs ("\t\"", out);
- for (; len--; toks++)
- write_token (out, toks);
- if (spc)
- fputs (" ", out);
- fputs ("\"", out);
-}
-
-static Stmt *
-alloc_stmt (unsigned vis, Token *tokens, Token *end, Token const *sym)
-{
- static unsigned alloc = 0;
- static Stmt *heap = 0;
-
- if (!alloc)
- {
- alloc = 1000;
- heap = XNEWVEC (Stmt, alloc);
- }
-
- Stmt *stmt = heap++;
- alloc--;
-
- tokens->space = 0;
- stmt->next = 0;
- stmt->vis = vis;
- stmt->tokens = tokens;
- stmt->len = end - tokens;
- stmt->sym = sym ? sym - tokens : ~0;
-
- return stmt;
-}
-
-static Stmt *
-rev_stmts (Stmt *stmt)
-{
- Stmt *prev = 0;
- Stmt *next;
-
- while (stmt)
- {
- next = stmt->next;
- stmt->next = prev;
- prev = stmt;
- stmt = next;
- }
-
- return prev;
-}
-
-static void
-write_stmt (FILE *out, const Stmt *stmt)
-{
- if ((stmt->vis & V_mask) != V_comment)
- {
- write_tokens (out, stmt->tokens, stmt->len,
- (stmt->vis & V_mask) == V_pred);
- fputs (stmt->vis & V_no_eol ? "\t" : "\n", out);
- }
-}
-
-static void
-write_stmts (FILE *out, const Stmt *stmts)
-{
- for (; stmts; stmts = stmts->next)
- write_stmt (out, stmts);
-}
-
-static Token *
-parse_insn (Token *tok)
-{
- unsigned depth = 0;
-
- do
- {
- Stmt *stmt;
- Token *sym = 0;
- unsigned s = V_insn;
- Token *start = tok;
-
- switch (tok++->kind)
- {
- case K_comment:
- while (tok->kind == K_comment)
- tok++;
- stmt = alloc_comment (start, tok);
- append_stmt (&fns, stmt);
- continue;
-
- case '{':
- depth++;
- break;
-
- case '}':
- depth--;
- break;
-
- case K_label:
- if (tok[-1].ptr[0] != '$')
- sym = tok - 1;
- tok[-1].end = 1;
- s = V_label;
- break;
-
- case '@':
- tok->space = 0;
- if (tok->kind == '!')
- tok++;
- if (tok->kind == K_symbol)
- sym = tok;
- tok++;
- s = V_pred;
- break;
-
- default:
- for (; tok->kind != ';'; tok++)
- {
- if (tok->kind == ',')
- tok[1].space = 0;
- else if (tok->kind == K_symbol)
- sym = tok;
- }
- tok++->end = 1;
- break;
- }
-
- stmt = alloc_stmt (s, start, tok, sym);
- append_stmt (&fns, stmt);
-
- if (!tok[-1].end && tok[0].kind == K_comment)
- {
- stmt->vis |= V_no_eol;
- stmt = alloc_comment (tok, tok + 1);
- append_stmt (&fns, stmt);
- tok++;
- }
- }
- while (depth);
-
- return tok;
-}
-
-/* comma separated list of tokens */
-
-static Token *
-parse_list_nosemi (Token *tok)
-{
- Token *start = tok;
-
- do
- if (!(++tok)->kind)
- break;
- while ((++tok)->kind == ',');
-
- tok[-1].end = 1;
- Stmt *stmt = alloc_stmt (V_dot, start, tok, 0);
- append_stmt (&decls, stmt);
-
- return tok;
-}
-
-#define is_keyword(T,S) \
- (sizeof (S) == (T)->len && !memcmp ((T)->ptr + 1, (S), (T)->len - 1))
-
-static Token *
-parse_init (Token *tok)
-{
- for (;;)
- {
- Token *start = tok;
- Token const *sym = 0;
- Stmt *stmt;
-
- if (tok->kind == K_comment)
- {
- while (tok->kind == K_comment)
- tok++;
- stmt = alloc_comment (start, tok);
- append_stmt (&vars, stmt);
- start = tok;
- }
-
- if (tok->kind == '{')
- tok[1].space = 0;
- for (; tok->kind != ',' && tok->kind != ';'; tok++)
- if (tok->kind == K_symbol)
- sym = tok;
- tok[1].space = 0;
- int end = tok++->kind == ';';
- stmt = alloc_stmt (V_insn, start, tok, sym);
- append_stmt (&vars, stmt);
- if (!tok[-1].end && tok->kind == K_comment)
- {
- stmt->vis |= V_no_eol;
- stmt = alloc_comment (tok, tok + 1);
- append_stmt (&vars, stmt);
- tok++;
- }
- if (end)
- break;
- }
- return tok;
-}
-
-static Token *
-parse_file (Token *tok)
-{
- Stmt *comment = 0;
-
- if (tok->kind == K_comment)
- {
- Token *start = tok;
-
- while (tok->kind == K_comment)
- {
- if (strncmp (tok->ptr, ":VAR_MAP ", 9) == 0)
- record_id (tok->ptr + 9, &vars_tail);
- if (strncmp (tok->ptr, ":FUNC_MAP ", 10) == 0)
- record_id (tok->ptr + 10, &funcs_tail);
- tok++;
- }
- comment = alloc_comment (start, tok);
- comment->vis |= V_prefix_comment;
- }
-
- if (tok->kind == K_dotted)
- {
- if (is_keyword (tok, "version")
- || is_keyword (tok, "target")
- || is_keyword (tok, "address_size"))
- {
- if (comment)
- append_stmt (&decls, comment);
- tok = parse_list_nosemi (tok);
- }
- else
- {
- unsigned vis = 0;
- const Token *def = 0;
- unsigned is_decl = 0;
- Token *start;
-
- for (start = tok;
- tok->kind && tok->kind != '=' && tok->kind != K_comment
- && tok->kind != '{' && tok->kind != ';'; tok++)
- {
- if (is_keyword (tok, "global")
- || is_keyword (tok, "const"))
- vis |= V_var;
- else if (is_keyword (tok, "func")
- || is_keyword (tok, "entry"))
- vis |= V_func;
- else if (is_keyword (tok, "visible"))
- vis |= V_global;
- else if (is_keyword (tok, "extern"))
- is_decl = 1;
- else if (is_keyword (tok, "weak"))
- vis |= V_weak;
- if (tok->kind == '(')
- {
- tok[1].space = 0;
- tok[0].space = 1;
- }
- else if (tok->kind == ')' && tok[1].kind != ';')
- tok[1].space = 1;
-
- if (tok->kind == K_symbol)
- def = tok;
- }
-
- if (!tok->kind)
- {
- /* end of file */
- if (comment)
- append_stmt (&fns, comment);
- }
- else if (tok->kind == '{'
- || tok->kind == K_comment)
- {
- /* function defn */
- Stmt *stmt = alloc_stmt (vis, start, tok, def);
- if (comment)
- {
- append_stmt (&fns, comment);
- stmt->vis |= V_prefix_comment;
- }
- append_stmt (&fns, stmt);
- tok = parse_insn (tok);
- }
- else
- {
- int assign = tok->kind == '=';
-
- tok++->end = 1;
- if ((vis & V_mask) == V_var && !is_decl)
- {
- /* variable */
- Stmt *stmt = alloc_stmt (vis, start, tok, def);
- if (comment)
- {
- append_stmt (&vars, comment);
- stmt->vis |= V_prefix_comment;
- }
- append_stmt (&vars, stmt);
- if (assign)
- tok = parse_init (tok);
- }
- else
- {
- /* declaration */
- Stmt *stmt = alloc_stmt (vis, start, tok, 0);
- if (comment)
- {
- append_stmt (&decls, comment);
- stmt->vis |= V_prefix_comment;
- }
- append_stmt (&decls, stmt);
- }
- }
- }
- }
- else
- {
- /* Something strange. Ignore it. */
- if (comment)
- append_stmt (&fns, comment);
-
- do
- tok++;
- while (tok->kind && !tok->end);
- }
- return tok;
-}
-
/* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'. */
static unsigned
@@ -840,19 +225,50 @@ access_check (const char *name, int mode)
static void
process (FILE *in, FILE *out)
{
- const char *input = read_file (in);
- Token *tok = tokenize (input);
+ size_t len;
+ const char *input = read_file (in, &len);
+
+ fprintf (out, "static const char ptx_code[] = \n \"");
+ for (size_t i = 0; i < len; i++)
+ {
+ char c = input[i];
+ bool nl = false;
+ switch (c)
+ {
+ case '\0':
+ putc ('\\', out);
+ c = '0';
+ break;
+ case '\r':
+ continue;
+ case '\n':
+ putc ('\\', out);
+ c = 'n';
+ nl = true;
+ break;
+ case '"':
+ case '\\':
+ putc ('\\', out);
+ break;
+
+ case '/':
+ if (strncmp (input + i, "//:VAR_MAP ", 11) == 0)
+ record_id (input + i + 11, &vars_tail);
+ if (strncmp (input + i, "//:FUNC_MAP ", 12) == 0)
+ record_id (input + i + 12, &funcs_tail);
+ break;
+
+ default:
+ break;
+ }
+ putc (c, out);
+ if (nl)
+ fputs ("\"\n\t\"", out);
+ }
+ fprintf (out, "\";\n\n");
+
unsigned int nvars = 0, nfuncs = 0;
- do
- tok = parse_file (tok);
- while (tok->kind);
-
- fprintf (out, "static const char ptx_code[] = \n");
- write_stmts (out, rev_stmts (decls));
- write_stmts (out, rev_stmts (vars));
- write_stmts (out, rev_stmts (fns));
- fprintf (out, ";\n\n");
fprintf (out, "static const char *var_mappings[] = {\n");
for (id_map *id = var_ids; id; id = id->next, nvars++)
fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : "");
@@ -863,8 +279,9 @@ process (FILE *in, FILE *out)
fprintf (out, "};\n\n");
fprintf (out, "static const void *target_data[] = {\n");
- fprintf (out, " ptx_code, (void*) %u, var_mappings, (void*) %u, "
- "func_mappings\n", nvars, nfuncs);
+ fprintf (out, " ptx_code, (void *)(__UINTPTR_TYPE__)sizeof (ptx_code),\n");
+ fprintf (out, " (void *) %u, var_mappings, (void *) %u, func_mappings\n",
+ nvars, nfuncs);
fprintf (out, "};\n\n");
fprintf (out, "extern void GOMP_offload_register (const void *, int, void *);\n");
@@ -983,47 +400,74 @@ main (int argc, char **argv)
obstack_ptr_grow (&argv_obstack, driver);
obstack_ptr_grow (&argv_obstack, "-xlto");
obstack_ptr_grow (&argv_obstack, target_ilp32 ? "-m32" : "-m64");
- obstack_ptr_grow (&argv_obstack, "-S");
+ obstack_ptr_grow (&argv_obstack, "-lgomp");
+ char *collect_mkoffload_opts = getenv ("COLLECT_MKOFFLOAD_OPTIONS");
+ if (collect_mkoffload_opts)
+ {
+ char *str = collect_mkoffload_opts;
+ char *p;
+ while ((p = strchr (str, ' ')) != 0)
+ {
+ *p = '\0';
+ obstack_ptr_grow (&argv_obstack, str);
+ str = p + 1;
+ }
+ obstack_ptr_grow (&argv_obstack, str);
+ }
+ bool fopenacc = false;
for (int ix = 1; ix != argc; ix++)
{
+ if (!strcmp (argv[ix], "-v"))
+ verbose = true;
+ else if (!strcmp (argv[ix], "-save-temps"))
+ save_temps = true;
+ else if (!strcmp (argv[ix], "-fopenacc"))
+ fopenacc = true;
+
if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
outname = argv[++ix];
else
obstack_ptr_grow (&argv_obstack, argv[ix]);
}
- ptx_name = make_temp_file (".mkoffload");
- obstack_ptr_grow (&argv_obstack, "-o");
- obstack_ptr_grow (&argv_obstack, ptx_name);
- obstack_ptr_grow (&argv_obstack, NULL);
- const char **new_argv = XOBFINISH (&argv_obstack, const char **);
-
- char *execpath = getenv ("GCC_EXEC_PREFIX");
- char *cpath = getenv ("COMPILER_PATH");
- char *lpath = getenv ("LIBRARY_PATH");
- unsetenv ("GCC_EXEC_PREFIX");
- unsetenv ("COMPILER_PATH");
- unsetenv ("LIBRARY_PATH");
-
- fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
- obstack_free (&argv_obstack, NULL);
-
- xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
- xputenv (concat ("COMPILER_PATH=", cpath, NULL));
- xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
-
- in = fopen (ptx_name, "r");
- if (!in)
- fatal_error (input_location, "cannot open intermediate ptx file");
-
ptx_cfile_name = make_temp_file (".c");
out = fopen (ptx_cfile_name, "w");
if (!out)
fatal_error (input_location, "cannot open '%s'", ptx_cfile_name);
- process (in, out);
+ /* We do not support OMP offloading. Don't generate an offload image
+ if we did not see -fopenacc. */
+ if (fopenacc)
+ {
+ ptx_name = make_temp_file (".mkoffload");
+ obstack_ptr_grow (&argv_obstack, "-o");
+ obstack_ptr_grow (&argv_obstack, ptx_name);
+ obstack_ptr_grow (&argv_obstack, NULL);
+ const char **new_argv = XOBFINISH (&argv_obstack, const char **);
+
+ char *execpath = getenv ("GCC_EXEC_PREFIX");
+ char *cpath = getenv ("COMPILER_PATH");
+ char *lpath = getenv ("LIBRARY_PATH");
+ unsetenv ("GCC_EXEC_PREFIX");
+ unsetenv ("COMPILER_PATH");
+ unsetenv ("LIBRARY_PATH");
+
+ fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+ obstack_free (&argv_obstack, NULL);
+
+ xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
+ xputenv (concat ("COMPILER_PATH=", cpath, NULL));
+ xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
+
+ in = fopen (ptx_name, "r");
+ if (!in)
+ fatal_error (input_location, "cannot open intermediate ptx file");
+
+ process (in, out);
+ }
+
fclose (out);
compile_native (ptx_cfile_name, outname, collect_gcc);
@@ -490,7 +490,7 @@ Objective-C and Objective-C++ Dialects}.
-static-libmpx -static-libmpxwrappers @gol
-shared -shared-libgcc -symbolic @gol
-T @var{script} -Wl,@var{option} -Xlinker @var{option} @gol
--u @var{symbol} -z @var{keyword}}
+-Xoffload @var{option} -u @var{symbol} -z @var{keyword}}
@item Directory Options
@xref{Directory Options,,Options for Directory Search}.
@@ -11404,6 +11404,11 @@ syntax than as separate arguments. For example, you can specify
@option{-Xlinker -Map -Xlinker output.map}. Other linkers may not support
this syntax for command-line options.
+@item -Xoffload @var{option}
+@opindex Xoffload
+Pass @var{option} as an option to the mkoffload program during the linking
+phase. This program is used to generate images for offloaded code.
+
@item -Wl,@var{option}
@opindex Wl
Pass @var{option} as an option to the linker. If @var{option} contains
@@ -1,3 +1,8 @@
+2015-05-13 Bernd Schmidt <bernds@codesourcery.com>
+
+ * gfortranspec.c (lang_specific_driver): Add -Xoffload options to
+ link -lm and -lgfortran.
+
2015-05-11 Thomas Schwinge <thomas@codesourcery.com>
Bernd Schmidt <bernds@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
@@ -403,6 +403,8 @@ For more information about these matters, see the file named COPYING\n\n"));
default:
break;
}
+ append_option (OPT_Xoffload, "-lm", 1);
+ append_option (OPT_Xoffload, "-lgfortran", 1);
}
#ifdef ENABLE_SHARED_LIBGCC
@@ -1184,6 +1184,11 @@ static vec<char_p> assembler_options;
These options are accumulated by -Wp,
and substituted into the preprocessor command with %Z. */
static vec<char_p> preprocessor_options;
+
+/* A vector of options to give to mkoffload.
+ These options are accumulated by -Xoffload and place in the
+ COLLECT_MKOFFLOAD_OPTIONS variable. */
+static vec<char_p> mkoffload_options;
static char *
skip_whitespace (char *p)
@@ -3202,6 +3207,7 @@ display_help (void)
fputs (_(" -Xassembler <arg> Pass <arg> on to the assembler\n"), stdout);
fputs (_(" -Xpreprocessor <arg> Pass <arg> on to the preprocessor\n"), stdout);
fputs (_(" -Xlinker <arg> Pass <arg> on to the linker\n"), stdout);
+ fputs (_(" -Xoffload <arg> Pass <arg> to mkoffload via an environment variable\n"), stdout);
fputs (_(" -save-temps Do not delete intermediate files\n"), stdout);
fputs (_(" -save-temps=<arg> Do not delete intermediate files\n"), stdout);
fputs (_("\
@@ -3257,6 +3263,12 @@ add_linker_option (const char *option, int len)
{
linker_options.safe_push (save_string (option, len));
}
+
+static void
+add_mkoffload_option (const char *option, int len)
+{
+ mkoffload_options.safe_push (save_string (option, len));
+}
/* Allocate space for an input file in infiles. */
@@ -3696,6 +3708,11 @@ driver_handle_option (struct gcc_options *opts,
do_save = false;
break;
+ case OPT_Xoffload:
+ add_mkoffload_option (arg, strlen (arg));
+ do_save = false;
+ break;
+
case OPT_Xpreprocessor:
add_preprocessor_option (arg, strlen (arg));
do_save = false;
@@ -4266,7 +4283,7 @@ process_command (unsigned int decoded_options_count,
}
gcc_assert (!IS_ABSOLUTE_PATH (tooldir_base_prefix));
- tooldir_prefix2 = concat (tooldir_base_prefix, spec_host_machine,
+ tooldir_prefix2 = concat (tooldir_base_prefix, spec_machine,
dir_separator_str, NULL);
/* Look for tools relative to the location from which the driver is
@@ -4391,6 +4408,23 @@ set_collect_gcc_options (void)
}
obstack_grow (&collect_obstack, "\0", 1);
xputenv (XOBFINISH (&collect_obstack, char *));
+
+#ifdef ENABLE_OFFLOADING
+ /* Build COLLECT_MKOFFLOAD_OPTIONS to have all of the options specified to
+ mkoffload. */
+ obstack_grow (&collect_obstack, "COLLECT_MKOFFLOAD_OPTIONS=",
+ sizeof ("COLLECT_MKOFFLOAD_OPTIONS=") - 1);
+
+ char_p opt;
+ FOR_EACH_VEC_ELT (mkoffload_options, i, opt)
+ {
+ if (i > 0)
+ obstack_grow (&collect_obstack, " ", 1);
+ obstack_grow (&collect_obstack, opt, strlen (opt));
+ }
+ obstack_grow (&collect_obstack, "\0", 1);
+ xputenv (XOBFINISH (&collect_obstack, char *));
+#endif
}
/* Process a spec string, accumulating and running commands. */
@@ -1,3 +1,15 @@
+2015-05-13 Bernd Schmidt <bernds@codesourcery.com>
+ Cesar Philippidis <cesar@codesourcery.com>
+
+ * config.host (nvptx-*): For an offloading build, add libgomp.a
+ and libgomp.spec to extra_parts.
+ * config/nvptx/t-nvptx (gomp-acc_on_device.o, gomp-tids.o)
+ (gomp-atomic.o, libgomp.a, libgomp.spec): New rules.
+ (OBJS_libgomp): New variable.
+ * config/nvptx/gomp-acc_on_device.c: New file.
+ * config/nvptx/gomp-atomic.asm: Likewise.
+ * config/nvptx/gomp-tids.c: Likewise.
+
2014-09-08 Thomas Schwinge <thomas@codesourcery.com>
* configure.ac (enable_accelerator, offload_targets): Remove.
@@ -1292,7 +1292,11 @@ mep*-*-*)
;;
nvptx-*)
tmake_file="$tmake_file nvptx/t-nvptx"
- extra_parts="crt0.o"
+ if test "x${enable_as_accelerator_for}" != x; then
+ extra_parts="crt0.o libgomp.a libgomp.spec"
+ else
+ extra_parts="crt0.o"
+ fi
;;
*)
echo "*** Configuration ${host} not supported" 1>&2
new file mode 100644
@@ -0,0 +1,9 @@
+int acc_on_device(int d)
+{
+ return __builtin_acc_on_device(d);
+}
+
+int acc_on_device_h_(int *d)
+{
+ return acc_on_device(*d);
+}
new file mode 100644
@@ -0,0 +1,37 @@
+
+// BEGIN PREAMBLE
+ .version 3.1
+ .target sm_30
+ .address_size 64
+ .extern .shared .u8 sdata[];
+// END PREAMBLE
+
+// BEGIN VAR DEF: libgomp_ptx_lock
+.global .align 4 .u32 libgomp_ptx_lock;
+
+// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start
+.visible .func GOMP_atomic_start;
+// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start
+.visible .func GOMP_atomic_start
+{
+ .reg .pred %p<2>;
+ .reg .s32 %r<2>;
+ .reg .s64 %rd<2>;
+BB5_1:
+ mov.u64 %rd1, libgomp_ptx_lock;
+ atom.global.cas.b32 %r1, [%rd1], 0, 1;
+ setp.ne.s32 %p1, %r1, 0;
+ @%p1 bra BB5_1;
+ ret;
+ }
+// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end
+.visible .func GOMP_atomic_end;
+// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end
+.visible .func GOMP_atomic_end
+{
+ .reg .s32 %r<2>;
+ .reg .s64 %rd<2>;
+ mov.u64 %rd1, libgomp_ptx_lock;
+ atom.global.exch.b32 %r1, [%rd1], 0;
+ ret;
+ }
new file mode 100644
@@ -0,0 +1,66 @@
+/* Each gang consists of 'worker' threads. Each worker has 'vector'
+ threads.
+
+ gang, worker and vector mapping functions:
+
+ *tid (0) => vector dimension
+ *tid (1) => worker dimension
+ *ctaid (0) = gang dimension
+
+ FIXME: these functions assume that the gang, worker and vector parameters
+ are 0 or 1. To generalize these functions, we should use -1 to indicate,
+ say, that a gang clause was used without its optional argument. In this
+ case, gang should correspond to ctaid(0), i.e., the num_gangs parameter
+ passed to cuLaunchKernel.
+
+ tid = [0, ntid-1]
+ ntid = [1...threads_per_dimension]
+*/
+
+int __attribute__ ((used))
+GOACC_get_num_threads (int gang, int worker, int vector)
+{
+ int vsize = vector * __builtin_GOACC_ntid (0);
+ int wsize = worker * __builtin_GOACC_ntid (1);
+ int gsize = gang * __builtin_GOACC_nctaid (0);
+ int size = 1;
+
+ if (vector)
+ size *= __builtin_GOACC_ntid (0);
+
+ if (worker)
+ size *= __builtin_GOACC_ntid (1);
+
+ if (gang)
+ size *= __builtin_GOACC_nctaid (0);
+
+ return size;
+}
+
+int __attribute__ ((used))
+GOACC_get_thread_num (int gang, int worker, int vector)
+{
+ int tid = 0;
+ int ws = __builtin_GOACC_ntid (1);
+ int vs = __builtin_GOACC_ntid (0);
+ int gid = __builtin_GOACC_ctaid (0);
+ int wid = __builtin_GOACC_tid (1);
+ int vid = __builtin_GOACC_tid (0);
+
+ if (gang && worker && vector)
+ tid = gid * ws * vs + vs * wid + vid;
+ else if (gang && !worker && vector)
+ tid = vs * gid + vid;
+ else if (gang && worker && !vector)
+ tid = ws * gid + wid;
+ else if (!gang && worker && vector)
+ tid = vs * wid + vid;
+ else if (!gang && !worker && vector)
+ tid = vid;
+ else if (!gang && worker && !vector)
+ tid = wid;
+ else if (gang && !worker && !vector)
+ tid = gid;
+
+ return tid;
+}
@@ -12,3 +12,16 @@ crt0.o: $(srcdir)/config/nvptx/crt0.s
# support it, and it may cause the build to fail, because of alloca usage, for
# example.
INHIBIT_LIBC_CFLAGS = -Dinhibit_libc
+
+gomp-acc_on_device.o: $(srcdir)/config/nvptx/gomp-acc_on_device.c
+ $(gcc_compile) -c -fno-builtin-acc_on_device $<
+gomp-tids.o: $(srcdir)/config/nvptx/gomp-tids.c
+ $(gcc_compile) -c -fopenacc -O $<
+gomp-atomic.o: $(srcdir)/config/nvptx/gomp-atomic.asm
+ cp $< $@
+
+OBJS_libgomp= gomp-acc_on_device.o gomp-tids.o gomp-atomic.o
+libgomp.a: $(OBJS_libgomp)
+ $(AR_CREATE_FOR_TARGET) $@ $(OBJS_libgomp)
+libgomp.spec:
+ echo "*link_gomp: -lgomp" >$@
@@ -1,3 +1,19 @@
+2015-05-13 Thomas Schwinge <thomas@codesourcery.com>
+ Bernd Schmidt <bernds@codesourcery.com>
+ Cesar Philippidis <cesar@codesourcery.com>
+
+ * oacc-ptx.h: Remove file.
+ * plugin/plugin-nvptx.c: Don't include it.
+ (link_ptx): Accept a length argument. Don't add predefined bits of
+ PTX code. Look for NUL characters as file boundaries in the input
+ and link the multiple PTX files.
+ (GOMP_OFFLOAD_load_image): Get the size of PTX code from the table
+ and pass it to link_ptx.
+ * testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Add
+ "-Xoffload -lgfortran -Xoffload -lm".
+ * testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags):
+ Likewise.
+
2015-05-11 Thomas Schwinge <thomas@codesourcery.com>
Bernd Schmidt <bernds@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
deleted file mode 100644
@@ -1,454 +0,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
- Contributed by Mentor Embedded.
-
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
-
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#define ABORT_PTX \
- ".version 3.1\n" \
- ".target sm_30\n" \
- ".address_size 64\n" \
- ".visible .func abort;\n" \
- ".visible .func abort\n" \
- "{\n" \
- "trap;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func _gfortran_abort;\n" \
- ".visible .func _gfortran_abort\n" \
- "{\n" \
- "trap;\n" \
- "ret;\n" \
- "}\n" \
-
-/* Generated with:
-
- $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
-*/
-#define ACC_ON_DEVICE_PTX \
- " .version 3.1\n" \
- " .target sm_30\n" \
- " .address_size 64\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
- "{\n" \
- " .reg.u32 %ar1;\n" \
- ".reg.u32 %retval;\n" \
- " .reg.u64 %hr10;\n" \
- " .reg.u32 %r24;\n" \
- " .reg.u32 %r25;\n" \
- " .reg.pred %r27;\n" \
- " .reg.u32 %r30;\n" \
- " ld.param.u32 %ar1, [%in_ar1];\n" \
- " mov.u32 %r24, %ar1;\n" \
- " setp.ne.u32 %r27,%r24,4;\n" \
- " set.u32.eq.u32 %r30,%r24,5;\n" \
- " neg.s32 %r25, %r30;\n" \
- " @%r27 bra $L3;\n" \
- " mov.u32 %r25, 1;\n" \
- "$L3:\n" \
- " mov.u32 %retval, %r25;\n" \
- " st.param.u32 [%out_retval], %retval;\n" \
- " ret;\n" \
- " }\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
- "{\n" \
- " .reg.u64 %ar1;\n" \
- ".reg.u32 %retval;\n" \
- " .reg.u64 %hr10;\n" \
- " .reg.u64 %r25;\n" \
- " .reg.u32 %r26;\n" \
- " .reg.u32 %r27;\n" \
- " ld.param.u64 %ar1, [%in_ar1];\n" \
- " mov.u64 %r25, %ar1;\n" \
- " ld.u32 %r26, [%r25];\n" \
- " {\n" \
- " .param.u32 %retval_in;\n" \
- " {\n" \
- " .param.u32 %out_arg0;\n" \
- " st.param.u32 [%out_arg0], %r26;\n" \
- " call (%retval_in), acc_on_device, (%out_arg0);\n" \
- " }\n" \
- " ld.param.u32 %r27, [%retval_in];\n" \
- "}\n" \
- " mov.u32 %retval, %r27;\n" \
- " st.param.u32 [%out_retval], %retval;\n" \
- " ret;\n" \
- " }"
-
- #define GOACC_INTERNAL_PTX \
- ".version 3.1\n" \
- ".target sm_30\n" \
- ".address_size 64\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
- ".extern .func abort;\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L4;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L5;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L8;\n" \
- "mov.u32 %r23,%tid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L7;\n" \
- "$L4:\n" \
- "mov.u32 %r24,%tid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L7;\n" \
- "$L5:\n" \
- "mov.u32 %r25,%tid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L7;\n" \
- "$L8:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L7:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L11;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L12;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L15;\n" \
- "mov.u32 %r23,%ntid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L14;\n" \
- "$L11:\n" \
- "mov.u32 %r24,%ntid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L14;\n" \
- "$L12:\n" \
- "mov.u32 %r25,%ntid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L14;\n" \
- "$L15:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L14:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L18;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L19;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L22;\n" \
- "mov.u32 %r23,%ctaid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L21;\n" \
- "$L18:\n" \
- "mov.u32 %r24,%ctaid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L21;\n" \
- "$L19:\n" \
- "mov.u32 %r25,%ctaid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L21;\n" \
- "$L22:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L21:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L25;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L26;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L29;\n" \
- "mov.u32 %r23,%nctaid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L28;\n" \
- "$L25:\n" \
- "mov.u32 %r24,%nctaid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L28;\n" \
- "$L26:\n" \
- "mov.u32 %r25,%nctaid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L28;\n" \
- "$L29:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L28:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \
- "{\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- "mov.u32 %r26,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r26;\n" \
- "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r27,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r22,%r27;\n" \
- "mov.u32 %r28,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r28;\n" \
- "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r29,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r23,%r29;\n" \
- "mul.lo.u32 %r24,%r22,%r23;\n" \
- "mov.u32 %r25,%r24;\n" \
- "mov.u32 %retval,%r25;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \
- "{\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .u32 %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .u32 %r32;\n" \
- ".reg .u32 %r33;\n" \
- "mov.u32 %r28,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r28;\n" \
- "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r29,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r22,%r29;\n" \
- "mov.u32 %r30,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r30;\n" \
- "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r31,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r23,%r31;\n" \
- "mul.lo.u32 %r24,%r22,%r23;\n" \
- "mov.u32 %r32,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r32;\n" \
- "call (%retval_in),GOACC_tid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r33,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r25,%r33;\n" \
- "add.u32 %r26,%r24,%r25;\n" \
- "mov.u32 %r27,%r26;\n" \
- "mov.u32 %retval,%r27;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n"
-
- #define GOMP_ATOMIC_PTX \
- ".version 3.1\n" \
- ".target sm_30\n" \
- ".address_size 64\n" \
- ".global .align 4 .u32 libgomp_ptx_lock;\n" \
- ".visible .func GOMP_atomic_start;\n" \
- ".visible .func GOMP_atomic_start\n" \
- "{\n" \
- " .reg .pred %p<2>;\n" \
- " .reg .s32 %r<2>;\n" \
- " .reg .s64 %rd<2>;\n" \
- "BB5_1:\n" \
- " mov.u64 %rd1, libgomp_ptx_lock;\n" \
- " atom.global.cas.b32 %r1, [%rd1], 0, 1;\n" \
- " setp.ne.s32 %p1, %r1, 0;\n" \
- " @%p1 bra BB5_1;\n" \
- " ret;\n" \
- "}\n" \
- ".visible .func GOMP_atomic_end;\n" \
- ".visible .func GOMP_atomic_end\n" \
- "{\n" \
- " .reg .s32 %r<2>;\n" \
- " .reg .s64 %rd<2>;\n" \
- " mov.u64 %rd1, libgomp_ptx_lock;\n" \
- " atom.global.exch.b32 %r1, [%rd1], 0;\n" \
- " ret;\n" \
- "}\n"
@@ -34,7 +34,6 @@
#include "openacc.h"
#include "config.h"
#include "libgomp-plugin.h"
-#include "oacc-ptx.h"
#include "oacc-plugin.h"
#include <pthread.h>
@@ -793,7 +792,7 @@ nvptx_get_num_devices (void)
static void
-link_ptx (CUmodule *module, char *ptx_code)
+link_ptx (CUmodule *module, char *ptx_code, size_t length)
{
CUjit_option opts[7];
void *optvals[7];
@@ -834,63 +833,38 @@ link_ptx (CUmodule *module, char *ptx_code)
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
- char *abort_ptx = ABORT_PTX;
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx,
- strlen (abort_ptx) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r));
- }
-
- char *acc_on_device_ptx = ACC_ON_DEVICE_PTX;
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx,
- strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s",
- cuda_error (r));
- }
-
- char *goacc_internal_ptx = GOACC_INTERNAL_PTX;
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx,
- strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s",
- cuda_error (r));
- }
-
- char *gomp_atomic_ptx = GOMP_ATOMIC_PTX;
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, gomp_atomic_ptx,
- strlen (gomp_atomic_ptx) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (gomp_atomic_ptx) error: %s",
- cuda_error (r));
- }
-
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code,
- strlen (ptx_code) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+ size_t off = 0;
+ while (off < length)
+ {
+ int l = strlen (ptx_code + off);
+ r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code + off, l + 1,
+ 0, 0, 0, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+ GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+ }
+
+ off += l;
+ while (off < length && ptx_code[off] == '\0')
+ off++;
}
r = cuLinkComplete (linkstate, &linkout, &linkoutsize);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed);
GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
+
r = cuModuleLoadData (module, linkout);
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r));
+
+ r = cuLinkDestroy (linkstate);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuLinkDestory error: %s", cuda_error (r));
}
static void
@@ -1633,7 +1607,7 @@ GOMP_OFFLOAD_load_image (int ord, void *target_data,
nvptx_attach_host_thread_to_device (ord);
- link_ptx (&module, img_header[0]);
+ link_ptx (&module, img_header[0], (size_t) img_header[1]);
pthread_mutex_lock (&ptx_image_lock);
new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
@@ -1647,18 +1621,19 @@ GOMP_OFFLOAD_load_image (int ord, void *target_data,
each offload image:
img_header[0] -> ptx code
- img_header[1] -> number of variables
- img_header[2] -> array of variable names (pointers to strings)
- img_header[3] -> number of kernels
- img_header[4] -> array of kernel names (pointers to strings)
+ img_header[1] -> size of ptx code
+ img_header[2] -> number of variables
+ img_header[3] -> array of variable names (pointers to strings)
+ img_header[4] -> number of kernels
+ img_header[5] -> array of kernel names (pointers to strings)
The array of kernel names and the functions addresses form a
one-to-one correspondence. */
- var_entries = (uintptr_t) img_header[1];
- var_names = (char **) img_header[2];
- fn_entries = (uintptr_t) img_header[3];
- fn_names = (char **) img_header[4];
+ var_entries = (uintptr_t) img_header[2];
+ var_names = (char **) img_header[3];
+ fn_entries = (uintptr_t) img_header[4];
+ fn_names = (char **) img_header[5];
*target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
* (fn_entries + var_entries));
@@ -7,7 +7,10 @@ global ALWAYS_CFLAGS
set shlib_ext [get_shlib_extension]
set lang_library_path "../libgfortran/.libs"
-set lang_link_flags "-lgfortran"
+#TODO
+# We're not using the gfortran driver, so have to mimic its behavior
+# here.
+set lang_link_flags "-lgfortran -Xoffload -lgfortran -Xoffload -lm"
if [info exists lang_include_flags] then {
unset lang_include_flags
}
@@ -9,7 +9,10 @@ global ALWAYS_CFLAGS
set shlib_ext [get_shlib_extension]
set lang_library_path "../libgfortran/.libs"
-set lang_link_flags "-lgfortran"
+#TODO
+# We're not using the gfortran driver, so have to mimic its behavior
+# here.
+set lang_link_flags "-lgfortran -Xoffload -lgfortran -Xoffload -lm"
if [info exists lang_include_flags] then {
unset lang_include_flags
}