diff mbox series

libcpp, v3: Add support for gnu::base64 #embed parameter

Message ID Zr3RO9BBeOZ/5BsB@tucnak
State New
Headers show
Series libcpp, v3: Add support for gnu::base64 #embed parameter | expand

Commit Message

Jakub Jelinek Aug. 15, 2024, 9:58 a.m. UTC
Hi!

Here is a new version of the gnu::base64 parameter support, the only changes
are in using the EMBED_PARAMS registry of parameters.

2024-08-15  Jakub Jelinek  <jakub@redhat.com>

libcpp/
	* internal.h (struct cpp_embed_params): Add base64 member.
	(_cpp_free_embed_params_tokens): Declare.
	* directives.cc (DIRECTIVE_TABLE): Add IN_I flag to T_EMBED.
	(save_token_for_embed, _cpp_free_embed_params_tokens): New functions.
	(EMBED_PARAMS): Add gnu::base64 entry.
	(_cpp_parse_embed_params): Parse gnu::base64 parameter.  If
	-fpreprocessed without -fdirectives-only, require #embed to have
	gnu::base64 parameter.  Diagnose conflict between gnu::base64 and
	limit or gnu::offset parameters.
	(do_embed): Use _cpp_free_embed_params_tokens.
	* files.cc (finish_embed, base64_dec_fn): New functions.
	(base64_dec): New array.
	(B64D0, B64D1, B64D2, B64D3): Define.
	(finish_base64_embed): New function.
	(_cpp_stack_embed): Use finish_embed.  Handle params->base64
	using finish_base64_embed.
	* macro.cc (builtin_has_embed): Call _cpp_free_embed_params_tokens.
gcc/
	* doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64
	parameter.
gcc/testsuite/
	* c-c++-common/cpp/embed-17.c: New test.
	* c-c++-common/cpp/embed-18.c: New test.
	* c-c++-common/cpp/embed-19.c: New test.
	* gcc.dg/cpp/embed-6.c: New test.
	* gcc.dg/cpp/embed-7.c: New test.



	Jakub

Comments

Joseph Myers Aug. 20, 2024, 5:23 p.m. UTC | #1
On Thu, 15 Aug 2024, Jakub Jelinek wrote:

> +#embed __FILE__ gnu::base64(1) prefix() suffix() /* { dg-error "expected character string literal" } */
> +#embed __FILE__ gnu::base64() prefix() suffix() /* { dg-error "expected character string literal" } */

Maybe also test this error with a non-character string literal (a wide 
string, etc.).  I think the code will already correctly reject wide 
strings here, but it would be good to test it.
diff mbox series

Patch

--- libcpp/internal.h.jj	2024-08-15 11:26:00.726026264 +0200
+++ libcpp/internal.h	2024-08-15 11:35:50.559664877 +0200
@@ -631,7 +631,7 @@  struct cpp_embed_params
   location_t loc;
   bool has_embed;
   cpp_num_part limit, offset;
-  cpp_embed_params_tokens prefix, suffix, if_empty;
+  cpp_embed_params_tokens prefix, suffix, if_empty, base64;
 };
 
 /* Character classes.  Based on the more primitive macros in safe-ctype.h.
@@ -805,6 +805,7 @@  extern void _cpp_restore_pragma_names (c
 extern int _cpp_do__Pragma (cpp_reader *, location_t);
 extern void _cpp_init_directives (cpp_reader *);
 extern void _cpp_init_internal_pragmas (cpp_reader *);
+extern void _cpp_free_embed_params_tokens (cpp_embed_params_tokens *);
 extern bool _cpp_parse_embed_params (cpp_reader *, struct cpp_embed_params *);
 extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
 				 linenum_type, unsigned int);
--- libcpp/directives.cc.jj	2024-08-15 11:39:49.476685559 +0200
+++ libcpp/directives.cc	2024-08-15 11:49:30.107446324 +0200
@@ -159,7 +159,7 @@  static void cpp_pop_definition (cpp_read
   D(error,	T_ERROR,	STDC89,    0)				\
   D(pragma,	T_PRAGMA,	STDC89,    IN_I)			\
   D(warning,	T_WARNING,	STDC23,    0)				\
-  D(embed,	T_EMBED,	STDC23,    INCL | EXPAND)		\
+  D(embed,	T_EMBED,	STDC23,    IN_I | INCL | EXPAND)	\
   D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND)		\
   D(ident,	T_IDENT,	EXTENSION, IN_I)			\
   D(import,	T_IMPORT,	EXTENSION, INCL | EXPAND)  /* ObjC */	\
@@ -932,6 +932,50 @@  do_include_next (cpp_reader *pfile)
   do_include_common (pfile, type);
 }
 
+/* Helper function for skip_balanced_token_seq and _cpp_parse_embed_params.
+   Save one token *TOKEN into *SAVE.  */
+
+static void
+save_token_for_embed (cpp_embed_params_tokens *save, const cpp_token *token)
+{
+  if (save->count == 0)
+    {
+      _cpp_init_tokenrun (&save->base_run, 4);
+      save->cur_run = &save->base_run;
+      save->cur_token = save->base_run.base;
+    }
+  else if (save->cur_token == save->cur_run->limit)
+    {
+      save->cur_run->next = XNEW (tokenrun);
+      save->cur_run->next->prev = save->cur_run;
+      _cpp_init_tokenrun (save->cur_run->next, 4);
+      save->cur_run = save->cur_run->next;
+      save->cur_token = save->cur_run->base;
+    }
+  *save->cur_token = *token;
+  save->cur_token->flags |= NO_EXPAND;
+  save->cur_token++;
+  save->count++;
+}
+
+/* Free memory associated with saved tokens in *SAVE.  */
+
+void
+_cpp_free_embed_params_tokens (cpp_embed_params_tokens *save)
+{
+  if (save->count == 0)
+    return;
+  tokenrun *n;
+  for (tokenrun *t = &save->base_run; t; t = n)
+    {
+      n = t->next;
+      XDELETEVEC (t->base);
+      if (t != &save->base_run)
+	XDELETE (t);
+    }
+  save->count = 0;
+}
+
 /* Skip over balanced preprocessing tokens until END is found.
    If SAVE is non-NULL, remember the parsed tokens in it.  NESTED is
    false in the outermost invocation of the function and true
@@ -961,26 +1005,7 @@  skip_balanced_token_seq (cpp_reader *pfi
       if (save
 	  && (token->type != CPP_PADDING || save->count)
 	  && (token->type != end || nested))
-	{
-	  if (save->count == 0)
-	    {
-	      _cpp_init_tokenrun (&save->base_run, 4);
-	      save->cur_run = &save->base_run;
-	      save->cur_token = save->base_run.base;
-	    }
-	  else if (save->cur_token == save->cur_run->limit)
-	    {
-	      save->cur_run->next = XNEW (tokenrun);
-	      save->cur_run->next->prev = save->cur_run;
-	      _cpp_init_tokenrun (save->cur_run->next, 4);
-	      save->cur_run = save->cur_run->next;
-	      save->cur_token = save->cur_run->base;
-	    }
-	  *save->cur_token = *token;
-	  save->cur_token->flags |= NO_EXPAND;
-	  save->cur_token++;
-	  save->count++;
-	}
+	save_token_for_embed (save, token);
       if (token->type == end)
 	return;
       switch (token->type)
@@ -1015,6 +1040,7 @@  skip_balanced_token_seq (cpp_reader *pfi
   EMBED_PARAM (PREFIX, "prefix")	\
   EMBED_PARAM (SUFFIX, "suffix")	\
   EMBED_PARAM (IF_EMPTY, "if_empty")	\
+  EMBED_PARAM (GNU_BASE64, "base64")	\
   EMBED_PARAM (GNU_OFFSET, "offset")
 
 enum embed_param_kind {
@@ -1058,12 +1084,33 @@  _cpp_parse_embed_params (cpp_reader *pfi
 		  cpp_error (pfile, CPP_DL_ERROR, "expected ')'");
 		  return false;
 		}
-	      return ret;
 	    }
-	  else if (token->type == CPP_CLOSE_PAREN && params->has_embed)
-	    return ret;
-	  cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
-	  return false;
+	  else if (token->type != CPP_CLOSE_PAREN || !params->has_embed)
+	    {
+	      cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
+	      return false;
+	    }
+	  if (params->base64.count
+	      && (seen & ((1 << EMBED_PARAM_LIMIT)
+			  | (1 << EMBED_PARAM_GNU_OFFSET))) != 0)
+	    {
+	      ret = false;
+	      if (!params->has_embed)
+		cpp_error_with_line (pfile, CPP_DL_ERROR,
+				     params->base64.base_run.base->src_loc, 0,
+				     "'gnu::base64' parameter conflicts with "
+				     "'limit' or 'gnu::offset' parameters");
+	    }
+	  else if (params->base64.count == 0
+		   && CPP_OPTION (pfile, preprocessed))
+	    {
+	      ret = false;
+	      if (!params->has_embed)
+		cpp_error_with_line (pfile, CPP_DL_ERROR, params->loc, 0,
+				     "'gnu::base64' parameter required in "
+				     "preprocessed source");
+	    }
+	  return ret;
 	}
       param_name = NODE_NAME (token->val.node.spelling);
       param_name_len = NODE_LEN (token->val.node.spelling);
@@ -1188,6 +1235,53 @@  _cpp_parse_embed_params (cpp_reader *pfi
 	    }
  	  token = _cpp_get_token_no_padding (pfile);
 	}
+      else if (param_kind == EMBED_PARAM_GNU_BASE64)
+	{
+	  token = _cpp_get_token_no_padding (pfile);
+	  while (token->type == CPP_OTHER
+		 && CPP_OPTION (pfile, preprocessed)
+		 && !CPP_OPTION (pfile, directives_only)
+		 && token->val.str.len == 1
+		 && token->val.str.text[0] == '\\')
+	    {
+	      /* Allow backslash newline inside of gnu::base64 argument
+		 for -fpreprocessed, so that it doesn't have to be
+		 megabytes long line.  */
+	      pfile->state.in_directive = 0;
+	      token = _cpp_get_token_no_padding (pfile);
+	      pfile->state.in_directive = 3;
+	    }
+	  if (token->type == CPP_STRING)
+	    {
+	      do
+		{
+		  save_token_for_embed (&params->base64, token);
+		  token = _cpp_get_token_no_padding (pfile);
+		  while (token->type == CPP_OTHER
+			 && CPP_OPTION (pfile, preprocessed)
+			 && !CPP_OPTION (pfile, directives_only)
+			 && token->val.str.len == 1
+			 && token->val.str.text[0] == '\\')
+		    {
+		      pfile->state.in_directive = 0;
+		      token = _cpp_get_token_no_padding (pfile);
+		      pfile->state.in_directive = 3;
+		    }
+		}
+	      while (token->type == CPP_STRING);
+	      if (token->type != CPP_CLOSE_PAREN)
+		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+				     "expected ')'");
+	    }
+	  else
+	    {
+	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+				   "expected character string literal");
+	      if (token->type != CPP_CLOSE_PAREN)
+		token = _cpp_get_token_no_padding (pfile);
+	    }
+	  token = _cpp_get_token_no_padding (pfile);
+	}
       else if (token->type == CPP_OPEN_PAREN)
 	{
 	  cpp_embed_params_tokens *save = NULL;
@@ -1268,26 +1362,10 @@  do_embed (cpp_reader *pfile)
   if (ok)
     _cpp_stack_embed (pfile, fname, angle_brackets, &params);
 
-  for (int i = 0; i < 3; ++i)
-    {
-      cpp_embed_params_tokens *p;
-      if (i == 0)
-	p = &params.prefix;
-      else if (i == 1)
-	p = &params.suffix;
-      else
-	p = &params.if_empty;
-      if (p->count == 0)
-	continue;
-      tokenrun *n;
-      for (tokenrun *t = &p->base_run; t; t = n)
-	{
-	  n = t->next;
-	  XDELETEVEC (t->base);
-	  if (t != &p->base_run)
-	    XDELETE (t);
-	}
-    }
+  _cpp_free_embed_params_tokens (&params.prefix);
+  _cpp_free_embed_params_tokens (&params.suffix);
+  _cpp_free_embed_params_tokens (&params.if_empty);
+  _cpp_free_embed_params_tokens (&params.base64);
 
  done:
   XDELETEVEC (fname);
--- libcpp/files.cc.jj	2024-08-15 11:26:00.727026251 +0200
+++ libcpp/files.cc	2024-08-15 11:35:50.561664852 +0200
@@ -1221,6 +1221,320 @@  cpp_probe_header_unit (cpp_reader *pfile
   return nullptr;
 }
 
+/* Helper function for _cpp_stack_embed.  Finish #embed/__has_embed processing
+   after a file is found and data loaded into buffer.  */
+
+static int
+finish_embed (cpp_reader *pfile, _cpp_file *file,
+	      struct cpp_embed_params *params)
+{
+  const uchar *buffer = file->buffer;
+  size_t limit = file->limit;
+  if (params->offset - file->offset > limit)
+    limit = 0;
+  else
+    {
+      buffer += params->offset - file->offset;
+      limit -= params->offset - file->offset;
+    }
+  if (params->limit < limit)
+    limit = params->limit;
+
+  /* For sizes larger than say 64 bytes, this is just a temporary
+     solution, we should emit a single new token which the FEs will
+     handle as an optimization.  */
+  size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
+  if (limit > max / 2
+      || (limit
+	  ? (params->prefix.count > max
+	     || params->suffix.count > max
+	     || (limit * 2 - 1 + params->prefix.count
+		 + params->suffix.count > max))
+	  : params->if_empty.count > max))
+    {
+      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+		    "%s is too large", file->path);
+      return 0;
+    }
+
+  size_t len = 0;
+  for (size_t i = 0; i < limit; ++i)
+    {
+      if (buffer[i] < 10)
+	len += 2;
+      else if (buffer[i] < 100)
+	len += 3;
+#if UCHAR_MAX == 255
+      else
+	len += 4;
+#else
+      else if (buffer[i] < 1000)
+	len += 4;
+      else
+	{
+	  char buf[64];
+	  len += sprintf (buf, "%d", buffer[i]) + 1;
+	}
+#endif
+      if (len > INTTYPE_MAXIMUM (ssize_t))
+	{
+	  cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+			"%s is too large", file->path);
+	  return 0;
+	}
+    }
+  uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
+  _cpp_buff *tok_buff = NULL;
+  cpp_token *toks = NULL, *tok = &pfile->directive_result;
+  size_t count = 0;
+  if (limit)
+    count = (params->prefix.count + limit * 2 - 1
+	     + params->suffix.count) - 1;
+  else if (params->if_empty.count)
+    count = params->if_empty.count - 1;
+  if (count)
+    {
+      tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
+      toks = (cpp_token *) tok_buff->base;
+    }
+  cpp_embed_params_tokens *prefix
+    = limit ? &params->prefix : &params->if_empty;
+  if (prefix->count)
+    {
+      *tok = *prefix->base_run.base;
+      tok = toks;
+      tokenrun *cur_run = &prefix->base_run;
+      while (cur_run)
+	{
+	  size_t cnt = (cur_run->next ? cur_run->limit
+			: prefix->cur_token) - cur_run->base;
+	  cpp_token *t = cur_run->base;
+	  if (cur_run == &prefix->base_run)
+	    {
+	      t++;
+	      cnt--;
+	    }
+	  memcpy (tok, t, cnt * sizeof (cpp_token));
+	  tok += cnt;
+	  cur_run = cur_run->next;
+	}
+    }
+  for (size_t i = 0; i < limit; ++i)
+    {
+      tok->src_loc = params->loc;
+      tok->type = CPP_NUMBER;
+      tok->flags = NO_EXPAND;
+      if (i == 0)
+	tok->flags |= PREV_WHITE;
+      tok->val.str.text = s;
+      tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
+      s += tok->val.str.len + 1;
+      if (tok == &pfile->directive_result)
+	tok = toks;
+      else
+	tok++;
+      if (i < limit - 1)
+	{
+	  tok->src_loc = params->loc;
+	  tok->type = CPP_COMMA;
+	  tok->flags = NO_EXPAND;
+	  tok++;
+	}
+    }
+  if (limit && params->suffix.count)
+    {
+      tokenrun *cur_run = &params->suffix.base_run;
+      cpp_token *orig_tok = tok;
+      while (cur_run)
+	{
+	  size_t cnt = (cur_run->next ? cur_run->limit
+			: params->suffix.cur_token) - cur_run->base;
+	  cpp_token *t = cur_run->base;
+	  memcpy (tok, t, cnt * sizeof (cpp_token));
+	  tok += cnt;
+	  cur_run = cur_run->next;
+	}
+      orig_tok->flags |= PREV_WHITE;
+    }
+  pfile->directive_result.flags |= PREV_WHITE;
+  if (count)
+    {
+      _cpp_push_token_context (pfile, NULL, toks, count);
+      pfile->context->buff = tok_buff;
+    }
+  return limit ? 1 : 2;
+}
+
+/* Helper function for initialization of base64_dec table.
+   Can't rely on ASCII compatibility, so check each letter
+   separately.  */
+
+constexpr signed char
+base64_dec_fn (unsigned char c)
+{
+  return (c == 'A' ? 0 : c == 'B' ? 1 : c == 'C' ? 2 : c == 'D' ? 3
+	  : c == 'E' ? 4 : c == 'F' ? 5 : c == 'G' ? 6 : c == 'H' ? 7
+	  : c == 'I' ? 8 : c == 'J' ? 9 : c == 'K' ? 10 : c == 'L' ? 11
+	  : c == 'M' ? 12 : c == 'N' ? 13 : c == 'O' ? 14 : c == 'P' ? 15
+	  : c == 'Q' ? 16 : c == 'R' ? 17 : c == 'S' ? 18 : c == 'T' ? 19
+	  : c == 'U' ? 20 : c == 'V' ? 21 : c == 'W' ? 22 : c == 'X' ? 23
+	  : c == 'Y' ? 24 : c == 'Z' ? 25
+	  : c == 'a' ? 26 : c == 'b' ? 27 : c == 'c' ? 28 : c == 'd' ? 29
+	  : c == 'e' ? 30 : c == 'f' ? 31 : c == 'g' ? 32 : c == 'h' ? 33
+	  : c == 'i' ? 34 : c == 'j' ? 35 : c == 'k' ? 36 : c == 'l' ? 37
+	  : c == 'm' ? 38 : c == 'n' ? 39 : c == 'o' ? 40 : c == 'p' ? 41
+	  : c == 'q' ? 42 : c == 'r' ? 43 : c == 's' ? 44 : c == 't' ? 45
+	  : c == 'u' ? 46 : c == 'v' ? 47 : c == 'w' ? 48 : c == 'x' ? 49
+	  : c == 'y' ? 50 : c == 'z' ? 51
+	  : c == '0' ? 52 : c == '1' ? 53 : c == '2' ? 54 : c == '3' ? 55
+	  : c == '4' ? 56 : c == '5' ? 57 : c == '6' ? 58 : c == '7' ? 59
+	  : c == '8' ? 60 : c == '9' ? 61 : c == '+' ? 62 : c == '/' ? 63
+	  : -1);
+}
+
+/* base64 decoding table.  */
+
+static constexpr signed char base64_dec[] = {
+#define B64D0(x) base64_dec_fn (x)
+#define B64D1(x) B64D0 (x), B64D0 (x + 1), B64D0 (x + 2), B64D0 (x + 3)
+#define B64D2(x) B64D1 (x), B64D1 (x + 4), B64D1 (x + 8), B64D1 (x + 12)
+#define B64D3(x) B64D2 (x), B64D2 (x + 16), B64D2 (x + 32), B64D2 (x + 48)
+  B64D3 (0), B64D3 (64), B64D3 (128), B64D3 (192)
+};
+
+/* Helper function for _cpp_stack_embed.  Handle #embed/__has_embed with
+   gnu::base64 parameter.  */
+
+static int
+finish_base64_embed (cpp_reader *pfile, const char *fname, bool angle,
+		     struct cpp_embed_params *params)
+{
+  size_t len, end, i, j, base64_len = 0, cnt;
+  uchar *buf = NULL, *q, pbuf[4], qbuf[3];
+  const uchar *base64_str;
+  if (angle || strcmp (fname, "."))
+    {
+      if (!params->has_embed)
+	cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+		      "'gnu::base64' parameter can be only used with \".\"");
+      return 0;
+    }
+  tokenrun *cur_run = &params->base64.base_run;
+  cpp_token *tend, *tok;
+  while (cur_run)
+    {
+      tend = cur_run->next ? cur_run->limit : params->base64.cur_token;
+      for (tok = cur_run->base; tok < tend; ++tok)
+	{
+	  if (tok->val.str.len < 2
+	      || tok->val.str.text[0] != '"'
+	      || tok->val.str.text[tok->val.str.len - 1] != '"')
+	    {
+	    fail:
+	      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+			    "'gnu::base64' argument not valid base64 "
+			    "encoded string");
+	      free (buf);
+	      return 0;
+	    }
+	  if (tok->val.str.len - 2 > (~(size_t) 0) - base64_len)
+	    goto fail;
+	  base64_len += tok->val.str.len - 2;
+	}
+      cur_run = cur_run->next;
+    }
+  if ((base64_len & 3) != 0)
+    goto fail;
+  len = base64_len / 4 * 3;
+  end = len;
+
+  if (params->has_embed)
+    q = qbuf;
+  else
+    {
+      buf = XNEWVEC (uchar, len ? len : 1);
+      q = buf;
+    }
+  cur_run = &params->base64.base_run;
+  tend = cur_run->next ? cur_run->limit : params->base64.cur_token;
+  tok = cur_run->base;
+  base64_str = tok->val.str.text + 1;
+  cnt = tok->val.str.len - 2;
+  ++tok;
+  for (i = 0; i < end; i += 3)
+    {
+      for (j = 0; j < 4; ++j)
+	{
+	  while (cnt == 0)
+	    {
+	      if (tok == tend)
+		{
+		  cur_run = cur_run->next;
+		  tend = (cur_run->next ? cur_run->limit
+			  : params->base64.cur_token);
+		  tok = cur_run->base;
+		}
+	      base64_str = tok->val.str.text + 1;
+	      cnt = tok->val.str.len - 2;
+	      ++tok;
+	    }
+	  pbuf[j] = *base64_str;
+	  base64_str++;
+	  --cnt;
+	}
+      if (pbuf[3] == '=' && i + 3 >= end)
+	{
+	  end = len - 3;
+	  --len;
+	  if (pbuf[2] == '=')
+	    --len;
+	  break;
+	}
+      int a = base64_dec[pbuf[0]];
+      int b = base64_dec[pbuf[1]];
+      int c = base64_dec[pbuf[2]];
+      int d = base64_dec[pbuf[3]];
+      if (a == -1 || b == -1 || c == -1 || d == -1)
+	goto fail;
+      q[0] = (a << 2) | (b >> 4);
+      q[1] = (b << 4) | (c >> 2);
+      q[2] = (c << 6) | d;
+      if (!params->has_embed)
+	q += 3;
+    }
+  if (len != end)
+    {
+      int a = base64_dec[pbuf[0]];
+      int b = base64_dec[pbuf[1]];
+      if (a == -1 || b == -1)
+	goto fail;
+      q[0] = (a << 2) | (b >> 4);
+      if (len - end == 2)
+	{
+	  int c = base64_dec[pbuf[2]];
+	  if (c == -1)
+	    goto fail;
+	  q[1] = (b << 4) | (c >> 2);
+	  if ((c & 3) != 0)
+	    goto fail;
+	}
+      else if ((b & 15) != 0)
+	goto fail;
+    }
+  if (params->has_embed)
+    return len ? 1 : 2;
+  _cpp_file *file = make_cpp_file (NULL, "");
+  file->embed = 1;
+  file->next_file = pfile->all_files;
+  pfile->all_files = file;
+  params->limit = -1;
+  params->offset = 0;
+  file->limit = len;
+  file->buffer = buf;
+  file->path = xstrdup ("<base64>");
+  return finish_embed (pfile, file, params);
+}
+
 /* Try to load FNAME with #embed/__has_embed parameters PARAMS.
    If !PARAMS->has_embed, return new token in pfile->directive_result
    (first token) and rest in a pushed non-macro context.
@@ -1231,6 +1545,8 @@  int
 _cpp_stack_embed (cpp_reader *pfile, const char *fname, bool angle,
 		  struct cpp_embed_params *params)
 {
+  if (params->base64.count)
+    return finish_base64_embed (pfile, fname, angle, params);
   cpp_dir *dir = search_path_head (pfile, fname, angle, IT_EMBED,
 				   params->has_embed);
   if (!dir)
@@ -1450,141 +1766,7 @@  _cpp_stack_embed (cpp_reader *pfile, con
       return limit && params->limit ? 1 : 2;
     }
 
-  const uchar *buffer = file->buffer;
-  size_t limit = file->limit;
-  if (params->offset - file->offset > limit)
-    limit = 0;
-  else
-    {
-      buffer += params->offset - file->offset;
-      limit -= params->offset - file->offset;
-    }
-  if (params->limit < limit)
-    limit = params->limit;
-
-  /* For sizes larger than say 64 bytes, this is just a temporary
-     solution, we should emit a single new token which the FEs will
-     handle as an optimization.  */
-  size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
-  if (limit > max / 2
-      || (limit
-	  ? (params->prefix.count > max
-	     || params->suffix.count > max
-	     || (limit * 2 + params->prefix.count
-		 + params->suffix.count > max))
-	  : params->if_empty.count > max))
-    {
-      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
-		    "%s is too large", file->path);
-      return 0;
-    }
-
-  size_t len = 0;
-  for (size_t i = 0; i < limit; ++i)
-    {
-      if (buffer[i] < 10)
-	len += 2;
-      else if (buffer[i] < 100)
-	len += 3;
-#if UCHAR_MAX == 255
-      else
-	len += 4;
-#else
-      else if (buffer[i] < 1000)
-	len += 4;
-      else
-	{
-	  char buf[64];
-	  len += sprintf (buf, "%d", buffer[i]) + 1;
-	}
-#endif
-      if (len > INTTYPE_MAXIMUM (ssize_t))
-	{
-	  cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
-			"%s is too large", file->path);
-	  return 0;
-	}
-    }
-  uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
-  _cpp_buff *tok_buff = NULL;
-  cpp_token *toks = NULL, *tok = &pfile->directive_result;
-  size_t count = 0;
-  if (limit)
-    count = (params->prefix.count + limit * 2 - 1
-	     + params->suffix.count) - 1;
-  else if (params->if_empty.count)
-    count = params->if_empty.count - 1;
-  if (count)
-    {
-      tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
-      toks = (cpp_token *) tok_buff->base;
-    }
-  cpp_embed_params_tokens *prefix
-    = limit ? &params->prefix : &params->if_empty;
-  if (prefix->count)
-    {
-      *tok = *prefix->base_run.base;
-      tok = toks;
-      tokenrun *cur_run = &prefix->base_run;
-      while (cur_run)
-	{
-	  size_t cnt = (cur_run->next ? cur_run->limit
-			: prefix->cur_token) - cur_run->base;
-	  cpp_token *t = cur_run->base;
-	  if (cur_run == &prefix->base_run)
-	    {
-	      t++;
-	      cnt--;
-	    }
-	  memcpy (tok, t, cnt * sizeof (cpp_token));
-	  tok += cnt;
-	  cur_run = cur_run->next;
-	}
-    }
-  for (size_t i = 0; i < limit; ++i)
-    {
-      tok->src_loc = params->loc;
-      tok->type = CPP_NUMBER;
-      tok->flags = NO_EXPAND;
-      if (i == 0)
-	tok->flags |= PREV_WHITE;
-      tok->val.str.text = s;
-      tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
-      s += tok->val.str.len + 1;
-      if (tok == &pfile->directive_result)
-	tok = toks;
-      else
-	tok++;
-      if (i < limit - 1)
-	{
-	  tok->src_loc = params->loc;
-	  tok->type = CPP_COMMA;
-	  tok->flags = NO_EXPAND;
-	  tok++;
-	}
-    }
-  if (limit && params->suffix.count)
-    {
-      tokenrun *cur_run = &params->suffix.base_run;
-      cpp_token *orig_tok = tok;
-      while (cur_run)
-	{
-	  size_t cnt = (cur_run->next ? cur_run->limit
-			: params->suffix.cur_token) - cur_run->base;
-	  cpp_token *t = cur_run->base;
-	  memcpy (tok, t, cnt * sizeof (cpp_token));
-	  tok += cnt;
-	  cur_run = cur_run->next;
-	}
-      orig_tok->flags |= PREV_WHITE;
-    }
-  pfile->directive_result.flags |= PREV_WHITE;
-  if (count)
-    {
-      _cpp_push_token_context (pfile, NULL, toks, count);
-      pfile->context->buff = tok_buff;
-    }
-  return limit ? 1 : 2;
+  return finish_embed (pfile, file, params);
 }
 
 /* Retrofit the just-entered main file asif it was an include.  This
--- libcpp/macro.cc.jj	2024-08-15 10:29:44.532063800 +0200
+++ libcpp/macro.cc	2024-08-15 11:35:50.562664840 +0200
@@ -505,6 +505,8 @@  builtin_has_embed (cpp_reader *pfile)
       if (ok && !pfile->state.skip_eval)
 	result = _cpp_stack_embed (pfile, fname, bracket, &params);
 
+      _cpp_free_embed_params_tokens (&params.base64);
+
       XDELETEVEC (fname);
     }
   else if (paren)
--- gcc/doc/cpp.texi.jj	2024-08-15 11:26:00.728026239 +0200
+++ gcc/doc/cpp.texi	2024-08-15 11:35:50.562664840 +0200
@@ -3967,7 +3967,8 @@  with currently supported standard parame
 @code{suffix} and @code{if_empty}, or implementation defined parameters
 specified by a unique vendor prefix followed by @code{::} followed by
 name of the parameter.  GCC uses the @code{gnu} prefix for vendor
-parameters and currently supports the @code{gnu::offset} parameter.
+parameters and currently supports the @code{gnu::offset} and
+@code{gnu::base64} parameters.
 
 The @code{limit} parameter argument is a constant expression which
 specifies the maximum number of bytes included by the directive,
@@ -3981,6 +3982,17 @@  The @code{gnu::offset} parameter argumen
 which specifies how many bytes to skip from the start of the resource.
 @code{limit} is then counted from that position.
 
+The @code{gnu::base64} parameter argument is a possibly concatenated
+character string literal with base64 encoded data.  See
+@uref{https://datatracker.ietf.org/doc/html/rfc4648#section-4}.  There
+should be no newlines in the string literal and because this parameter
+is meant namely for use by the preprocessor itself, there is no support
+for any escape sequences in the string literal argument.  If @code{gnu::base64}
+parameter is specified, the @code{limit} and @code{gnu::offset} parameters
+should not be specified and the filename should be always @code{"."}.
+Instead of reading a file the directive will decode the base64 encoded
+data and use that as the data to include.
+
 The @code{#embed} directive is not supported in the Traditional Mode
 (@pxref{Traditional Mode}).
 
--- gcc/testsuite/c-c++-common/cpp/embed-17.c.jj	2024-08-15 11:35:50.563664827 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-17.c	2024-08-15 11:35:50.563664827 +0200
@@ -0,0 +1,116 @@ 
+/* { dg-do run } */
+/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+
+#if __has_embed ("." gnu::base64 ("")) != __STDC_EMBED_EMPTY__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SA==")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." prefix(-) suffix (-) if_empty (-) __gnu__::__base64__ ("SA==")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::__base64__ ("SGU=")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::__base64__ ("SGVs")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." __gnu__::base64 ("SGVsbG8=")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+/* M. Tulli Ciceronis De Finibus Bonorum et Malorum.  Liber Primus.  */
+/* echo "Tm9u....bnQu" | fmt -s -w 76 | base64 -d to decode.  */
+#define BONORUM_ET_MALORUM \
+"Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOtc3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9zdGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVpYsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2MgZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjDqW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVlIG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDDqXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOpbnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFtIG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3IsIHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGksIGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBuZWdlbnQu"
+#if __has_embed ("." gnu::base64 (BONORUM_ET_MALORUM)) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("foo" gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed (<foo> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed (<.> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SGU=") limit(5)) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SGU=") gnu::offset(2)) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#embed "." gnu::base64 ("") if_empty (int a = 42;) prefix(+ +) suffix (+ +)
+#embed "." __gnu__::__base64__ ("SA==") prefix (int b = ) suffix (;) if_empty (+ +)
+const unsigned char c[] = {
+  #embed "." gnu::base64("SGU=")
+};
+const unsigned char d[] = {
+  #embed "." gnu::base64 ("SGVs")
+};
+const unsigned char e[] = {
+  #embed "." gnu::base64 ("SGVsbG8=")
+};
+const unsigned char f[] = {
+#ifdef __cplusplus
+  #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix (' ', )
+#else
+  #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix ([1] = ) suffix(, [0] = ' ')
+#endif
+};
+#if __has_embed ("." gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg" \
+"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" \
+"YSBhbGlxdWEuCg==")) == __STDC_EMBED_FOUND__
+const unsigned char g[] = {
+#embed "." gnu::base64("" \
+"T" "G9" "yZW" \
+"0gaX" \
+"BzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg" \
+"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" \
+"YSBhbGlxdWEuCg==")
+};
+#endif
+
+#ifdef __cplusplus
+#define C "C"
+#else
+#define C
+#endif
+extern C void abort (void);
+extern C int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+int
+main ()
+{
+  if (a != 42 || b != 'H')
+    abort ();
+  if (sizeof (c) != 2 || c[0] != 'H' || c[1] != 'e')
+    abort ();
+  if (sizeof (d) != 3 || d[0] != 'H' || d[1] != 'e' || d[2] != 'l')
+    abort ();
+  if (sizeof (e) != 5 || memcmp (e, "Hello", 5))
+    abort ();
+  if (sizeof (f) != 1 + 747 || memcmp (f, " Non eram néscius, Brute",
+				       sizeof (" Non eram néscius, Brute") - 1))
+    abort ();
+  const char ge[]
+    = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.";
+  if (sizeof (g) != sizeof (ge)
+      || memcmp (g, ge, sizeof (ge) - 1)
+      || g[sizeof (ge) - 1] != '\n')
+    abort ();
+}
--- gcc/testsuite/c-c++-common/cpp/embed-18.c.jj	2024-08-15 11:35:50.563664827 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-18.c	2024-08-15 11:35:50.563664827 +0200
@@ -0,0 +1,33 @@ 
+/* { dg-do preprocess } */
+/* { dg-options "" } */
+
+#embed "." gnu::base64("") __gnu__::__base64__("") /* { dg-error "duplicate embed parameter 'gnu::base64'" } */
+#embed __FILE__ gnu::base64 prefix() suffix() /* { dg-error "expected '\\\('" } */
+#embed __FILE__ gnu::base64(1) prefix() suffix() /* { dg-error "expected character string literal" } */
+#embed __FILE__ gnu::base64() prefix() suffix() /* { dg-error "expected character string literal" } */
+#embed "." prefix() suffix() gnu::base64("" /* { dg-error "expected '\\\)'" } */
+#embed "." gnu::base64("a") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#embed "." gnu::base64("----") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#embed "." gnu::base64("a===") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#embed "." gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg==") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#embed "embed-18.c" gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be only used with \\\".\\\"" } */
+#embed <embed-18.c> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be only used with \\\".\\\"" } */
+#embed <.> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be only used with \\\".\\\"" } */
+#embed "." gnu::base64("SA==") limit(3) /* { dg-error "'gnu::base64' parameter conflicts with 'limit' or 'gnu::offset' parameters" } */
+#embed "." gnu::base64("SA==") gnu::offset(1) /* { dg-error "'gnu::base64' parameter conflicts with 'limit' or 'gnu::offset' parameters" } */
+#if 1 + __has_embed ("." gnu::base64("") __gnu__::__base64__("")) /* { dg-error "duplicate embed parameter 'gnu::base64'" } */
+#endif
+#if 1 + __has_embed (__FILE__ __gnu__::__base64__ prefix() suffix()) /* { dg-error "expected '\\\('" } */
+#endif
+#if 1 + __has_embed (__FILE__ __gnu__::__base64__(1) prefix() suffix()) /* { dg-error "expected character string literal" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::base64() prefix() suffix()) /* { dg-error "expected character string literal" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("a")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("----")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("a===")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg==")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#endif
--- gcc/testsuite/c-c++-common/cpp/embed-19.c.jj	2024-08-15 11:35:50.563664827 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-19.c	2024-08-15 11:35:50.563664827 +0200
@@ -0,0 +1,5 @@ 
+/* { dg-do run } */
+/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir -save-temps -fdirectives-only" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+
+#include "embed-1.c"
--- gcc/testsuite/gcc.dg/cpp/embed-6.c.jj	2024-08-15 11:35:50.563664827 +0200
+++ gcc/testsuite/gcc.dg/cpp/embed-6.c	2024-08-15 11:35:50.563664827 +0200
@@ -0,0 +1,6 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fpreprocessed" } */
+
+const unsigned char c[] = {
+#embed "embed-6.c" limit (64)	/* { dg-error "'gnu::base64' parameter required in preprocessed source" } */
+};
--- gcc/testsuite/gcc.dg/cpp/embed-7.c.jj	2024-08-15 11:35:50.563664827 +0200
+++ gcc/testsuite/gcc.dg/cpp/embed-7.c	2024-08-15 11:35:50.563664827 +0200
@@ -0,0 +1,6 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fpreprocessed -fdirectives-only" } */
+
+const unsigned char c[] = {
+#embed "embed-7.c" limit (64)	/* { dg-error "'gnu::base64' parameter required in preprocessed source" } */
+};