diff mbox series

[v3] gcn/mkoffload.cc: Use #embed for including the generated ELF file

Message ID 8e92be89-e9f0-457c-962a-e8804c59b7ad@baylibre.com
State New
Headers show
Series [v3] gcn/mkoffload.cc: Use #embed for including the generated ELF file | expand

Commit Message

Tobias Burnus July 19, 2024, 2:57 p.m. UTC
Hi,

Jakub Jelinek wrote:
>> +	   "#if defined(__STDC_EMBED_FOUND__) && __has_embed (\"%s\") == __STDC_EMBED_FOUND__\n"
> If this was an attempt to deal gracefully with no #embed support, then
> the above would be wrong and should have been
> #if defined(__STDC_EMBED_FOUND__) && defined(__has_embed)
> #if __has_embed ("whatever") == __STDC_EMBED_FOUND__

I was kind of both – assuming that #embed is available (as it should be 
compiled by the accompanied compiler) but handle the case that it is not.

However, as '#embed' is well diagnosed if unsupported, that part is not 
really needed.

> Now, if all you want is an error if the file doesn't exist, then
> #embed "whatever"
> will do that too […]
>
> If you want an error not just when it doesn't exist, but also when it
> is empty, then you could do
> #embed "whatever" if_empty (%%%)

The idea was to also error out if the file is empty – as that shouldn't 
happen here: if offloading code was found, the code gen should be done. 
However, using an invalid expression seems to be a good idea as that's 
really a special case that shouldn't happen.

* * *

I have additionally replaced the #include by __UINTPTR_TYPE__ and 
__SIZE_TYPE__ to avoid including 3 header files; this doesn't have a 
large effect, but still.

Updated patch attached.

OK for mainline, once Jakub's #embed is committed?

* * *

BTW: Testing shows for a hello world program (w/o #embed patch)

For -foffload=...: 'disable' 0.04s, 'nvptx-none' 0.15s, 'amdgcn-amdhsa' 
1.2s.

With a simple #embed (this patch plus Jakub's first patch), the 
performance is unchanged. I then applied Jakub's follow up patches, but 
I then get an ICE (Jakub will have a look).

But compiling it with 'g++' (→ COLLECT_GCC is g++) works; result: takes 
0.2s (~6× faster) and compiling for both nvptx and gcn takes 0.3s, 
nearly 5× faster.

Tobias

Comments

Tobias Burnus Sept. 13, 2024, 2:24 p.m. UTC | #1
On July 19, 2024 Tobias Burnus wrote:
>
> Updated patch attached.
>
As #embed is now supported by GCC (thanks!), I could commit this patch :-)

Committed as r15-3629-g508ef585243d46 → 
https://gcc.gnu.org/r15-3629-g508ef585243d46

Unless I missed something, we need to wait for a few pending patches 
before there is a real speed up. However, first, that will come then 
automatically to GCN compilations and, secondly, the generated code is 
already much nicer thanks to #embed + seems to be a tiny tiny bit faster 
already.

Tobias
Jakub Jelinek Sept. 13, 2024, 2:29 p.m. UTC | #2
On Fri, Sep 13, 2024 at 04:24:47PM +0200, Tobias Burnus wrote:
> As #embed is now supported by GCC (thanks!), I could commit this patch :-)

Thanks to Joseph for the reviews.

> Committed as r15-3629-g508ef585243d46 →
> https://gcc.gnu.org/r15-3629-g508ef585243d46
> 
> Unless I missed something, we need to wait for a few pending patches before
> there is a real speed up. However, first, that will come then automatically
> to GCN compilations and, secondly, the generated code is already much nicer
> thanks to #embed + seems to be a tiny tiny bit faster already.

Indeed, only the "dumb" support is in now including the 2 extensions,
so it will work, just not will speed things up (at least not significantly).
The rest whenever it is reviewed (and adjusted according to patch review).

	Jakub
diff mbox series

Patch

 gcn/mkoffload.cc: Use #embed for including the generated ELF file

gcc/ChangeLog:

	* config/gcn/mkoffload.cc (read_file): Remove.
	(process_asm): Do not add '#include' to generated C file.
	(process_obj): Generate C file that uses #embed and use
	__SIZE_TYPE__ and __UINTPTR_TYPE__ instead the #include-defined
	size_t and uintptr.
	(main): Update call to it; remove no longer needed file I/O.

 gcc/config/gcn/mkoffload.cc | 79 +++++++--------------------------------------
 1 file changed, 12 insertions(+), 67 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 810298a799b..c3c998639ff 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -182,44 +182,6 @@  xputenv (const char *string)
   putenv (CONST_CAST (char *, string));
 }
 
-/* Read the whole input file.  It will be NUL terminated (but
-   remember, there could be a NUL in the file itself.  */
-
-static const char *
-read_file (FILE *stream, size_t *plen)
-{
-  size_t alloc = 16384;
-  size_t base = 0;
-  char *buffer;
-
-  if (!fseek (stream, 0, SEEK_END))
-    {
-      /* Get the file size.  */
-      long s = ftell (stream);
-      if (s >= 0)
-	alloc = s + 100;
-      fseek (stream, 0, SEEK_SET);
-    }
-  buffer = XNEWVEC (char, alloc);
-
-  for (;;)
-    {
-      size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
-
-      if (!n)
-	break;
-      base += n;
-      if (base + 1 == alloc)
-	{
-	  alloc *= 2;
-	  buffer = XRESIZEVEC (char, buffer, alloc);
-	}
-    }
-  buffer[base] = 0;
-  *plen = base;
-  return buffer;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
    Tokens are assumed to be delimited by ':'.  */
 
@@ -657,10 +619,6 @@  process_asm (FILE *in, FILE *out, FILE *cfile)
   struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
   struct regcount *regcounts = XOBFINISH (&regcounts_os, struct regcount *);
 
-  fprintf (cfile, "#include <stdlib.h>\n");
-  fprintf (cfile, "#include <stdint.h>\n");
-  fprintf (cfile, "#include <stdbool.h>\n\n");
-
   fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
   fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count);
 
@@ -725,35 +683,28 @@  process_asm (FILE *in, FILE *out, FILE *cfile)
 /* Embed an object file into a C source file.  */
 
 static void
-process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
+process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires)
 {
-  size_t len = 0;
-  const char *input = read_file (in, &len);
-
   /* Dump out an array containing the binary.
-     FIXME: do this with objcopy.  */
-  fprintf (cfile, "static unsigned char gcn_code[] = {");
-  for (size_t i = 0; i < len; i += 17)
-    {
-      fprintf (cfile, "\n\t");
-      for (size_t j = i; j < i + 17 && j < len; j++)
-	fprintf (cfile, "%3u,", (unsigned char) input[j]);
-    }
-  fprintf (cfile, "\n};\n\n");
+     If the file is empty, a parse error is shown as the argument to is_empty
+     is an undeclared identifier.  */
+  fprintf (cfile,
+	   "static unsigned char gcn_code[] = {\n"
+	   "#embed \"%s\" if_empty (error_file_is_empty)\n"
+	   "};\n\n", fname_in);
 
   fprintf (cfile,
 	   "static const struct gcn_image {\n"
-	   "  size_t size;\n"
+	   "  __SIZE_TYPE__ size;\n"
 	   "  void *image;\n"
 	   "} gcn_image = {\n"
-	   "  %zu,\n"
+	   "  sizeof(gcn_code),\n"
 	   "  gcn_code\n"
-	   "};\n\n",
-	   len);
+	   "};\n\n");
 
   fprintf (cfile,
 	   "static const struct gcn_data {\n"
-	   "  uintptr_t omp_requires_mask;\n"
+	   "  __UINTPTR_TYPE__ omp_requires_mask;\n"
 	   "  const struct gcn_image *gcn_image;\n"
 	   "  unsigned kernel_count;\n"
 	   "  const struct hsa_kernel_description *kernel_infos;\n"
@@ -1312,13 +1263,7 @@  main (int argc, char **argv)
       fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
       obstack_free (&ld_argv_obstack, NULL);
 
-      in = fopen (gcn_o_name, "r");
-      if (!in)
-	fatal_error (input_location, "cannot open intermediate gcn obj file");
-
-      process_obj (in, cfile, omp_requires);
-
-      fclose (in);
+      process_obj (gcn_o_name, cfile, omp_requires);
 
       xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
       xputenv (concat ("COMPILER_PATH=", cpath, NULL));