diff mbox series

[RFC,v2,2/2] c: Add __lengthof__() operator

Message ID 20240728164758.334853-3-alx@kernel.org
State New
Headers show
Series c: Add __lengthof__ operator | expand

Commit Message

Alejandro Colomar July 28, 2024, 4:48 p.m. UTC
This operator is similar to sizeof() but can only be applied to an
array, and returns its length (number of elements).

FUTURE DIRECTIONS:

	We could make it work with array parameters to functions, and
	somehow magically return the length designator of the array,
	regardless of it being really a pointer.

Link: <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2529.pdf>
Cc: Gabriel Ravier <gabravier@gmail.com>
Cc: Martin Uecker <uecker@tugraz.at>
Cc: Joseph Myers <josmyers@redhat.com>
Signed-off-by: Alejandro Colomar <alx@kernel.org>
---
 gcc/c-family/c-common.cc  | 19 ++++++++
 gcc/c-family/c-common.def |  3 ++
 gcc/c-family/c-common.h   |  2 +
 gcc/c/c-decl.cc           | 20 ++++++---
 gcc/c/c-parser.cc         | 61 +++++++++++++++++++------
 gcc/c/c-tree.h            |  4 ++
 gcc/c/c-typeck.cc         | 95 +++++++++++++++++++++++++++++++++++++--
 gcc/cp/operators.def      |  1 +
 gcc/target.h              |  3 ++
 9 files changed, 184 insertions(+), 24 deletions(-)
diff mbox series

Patch

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index e7e371fd26f..c0d6239c1f8 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -465,6 +465,7 @@  const struct c_common_resword c_common_reswords[] =
   { "__inline",		RID_INLINE,	0 },
   { "__inline__",	RID_INLINE,	0 },
   { "__label__",	RID_LABEL,	0 },
+  { "__lengthof__",	RID_LENGTHOF, 0 },
   { "__null",		RID_NULL,	0 },
   { "__real",		RID_REALPART,	0 },
   { "__real__",		RID_REALPART,	0 },
@@ -4070,6 +4071,24 @@  c_alignof_expr (location_t loc, tree expr)
 
   return fold_convert_loc (loc, size_type_node, t);
 }
+
+/* Implement the lengthof keyword: Return the length of an array,
+   that is, the number of elements in the array.  */
+
+tree
+c_lengthof_type (location_t loc, tree type)
+{
+  enum tree_code type_code;
+
+  type_code = TREE_CODE (type);
+  if (type_code != ARRAY_TYPE)
+    {
+      error_at (loc, "invalid application of %<lengthof%> to type %qT", type);
+      return error_mark_node;
+    }
+
+  return array_type_nelts_top (type);
+}
 
 /* Handle C and C++ default attributes.  */
 
diff --git a/gcc/c-family/c-common.def b/gcc/c-family/c-common.def
index 5de96e5d4a8..6d162f67104 100644
--- a/gcc/c-family/c-common.def
+++ b/gcc/c-family/c-common.def
@@ -50,6 +50,9 @@  DEFTREECODE (EXCESS_PRECISION_EXPR, "excess_precision_expr", tcc_expression, 1)
    number.  */
 DEFTREECODE (USERDEF_LITERAL, "userdef_literal", tcc_exceptional, 3)
 
+/* Represents a 'lengthof' expression.  */
+DEFTREECODE (LENGTHOF_EXPR, "lengthof_expr", tcc_expression, 1)
+
 /* Represents a 'sizeof' expression during C++ template expansion,
    or for the purpose of -Wsizeof-pointer-memaccess warning.  */
 DEFTREECODE (SIZEOF_EXPR, "sizeof_expr", tcc_expression, 1)
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index ccaea27c2b9..f815a4cf3bc 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -105,6 +105,7 @@  enum rid
 
   /* C extensions */
   RID_ASM,       RID_TYPEOF,   RID_TYPEOF_UNQUAL, RID_ALIGNOF,  RID_ATTRIBUTE,
+  RID_LENGTHOF,
   RID_VA_ARG,
   RID_EXTENSION, RID_IMAGPART, RID_REALPART, RID_LABEL,    RID_CHOOSE_EXPR,
   RID_TYPES_COMPATIBLE_P,      RID_BUILTIN_COMPLEX,	   RID_BUILTIN_SHUFFLE,
@@ -885,6 +886,7 @@  extern tree c_common_truthvalue_conversion (location_t, tree);
 extern void c_apply_type_quals_to_decl (int, tree);
 extern tree c_sizeof_or_alignof_type (location_t, tree, bool, bool, int);
 extern tree c_alignof_expr (location_t, tree);
+extern tree c_lengthof_type (location_t, tree);
 /* Print an error message for invalid operands to arith operation CODE.
    NOP_EXPR is used as a special case (see truthvalue_conversion).  */
 extern void binary_op_error (rich_location *, enum tree_code, tree, tree);
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 97f1d346835..1836151fc41 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -8937,12 +8937,16 @@  start_struct (location_t loc, enum tree_code code, tree name,
      within a statement expr used within sizeof, et. al.  This is not
      terribly serious as C++ doesn't permit statement exprs within
      sizeof anyhow.  */
-  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof))
+  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof || in_lengthof))
     warning_at (loc, OPT_Wc___compat,
 		"defining type in %qs expression is invalid in C++",
 		(in_sizeof
 		 ? "sizeof"
-		 : (in_typeof ? "typeof" : "alignof")));
+		 : (in_typeof
+		    ? "typeof"
+		    : (in_alignof
+		       ? "alignof"
+		       : "lengthof"))));
 
   if (in_underspecified_init)
     error_at (loc, "%qT defined in underspecified object initializer", ref);
@@ -9897,7 +9901,7 @@  finish_struct (location_t loc, tree t, tree fieldlist, tree attributes,
 	 struct_types.  */
       if (warn_cxx_compat
 	  && struct_parse_info != NULL
-	  && !in_sizeof && !in_typeof && !in_alignof)
+	  && !in_sizeof && !in_typeof && !in_alignof && !in_lengthof)
 	struct_parse_info->struct_types.safe_push (t);
      }
 
@@ -10071,12 +10075,16 @@  start_enum (location_t loc, struct c_enum_contents *the_enum, tree name,
   /* FIXME: This will issue a warning for a use of a type defined
      within sizeof in a statement expr.  This is not terribly serious
      as C++ doesn't permit statement exprs within sizeof anyhow.  */
-  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof))
+  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof || in_lengthof))
     warning_at (loc, OPT_Wc___compat,
 		"defining type in %qs expression is invalid in C++",
 		(in_sizeof
 		 ? "sizeof"
-		 : (in_typeof ? "typeof" : "alignof")));
+		 : (in_typeof
+		    ? "typeof"
+		    : (in_alignof
+		       ? "alignof"
+		       : "lengthof"))));
 
   if (in_underspecified_init)
     error_at (loc, "%qT defined in underspecified object initializer",
@@ -10270,7 +10278,7 @@  finish_enum (tree enumtype, tree values, tree attributes)
      struct_types.  */
   if (warn_cxx_compat
       && struct_parse_info != NULL
-      && !in_sizeof && !in_typeof && !in_alignof)
+      && !in_sizeof && !in_typeof && !in_alignof && !in_lengthof)
     struct_parse_info->struct_types.safe_push (enumtype);
 
   /* Check for consistency with previous definition */
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 12c5ed5d92c..09bb19f9299 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -74,7 +74,17 @@  along with GCC; see the file COPYING3.  If not see
 #include "bitmap.h"
 #include "analyzer/analyzer-language.h"
 #include "toplev.h"
+
+#define c_parser_sizeof_expression(parser)                                    \
+(                                                                             \
+  c_parser_sizeof_or_lengthof_expression (parser, RID_SIZEOF)                 \
+)
 
+#define c_parser_lengthof_expression(parser)                                  \
+(                                                                             \
+  c_parser_sizeof_or_lengthof_expression (parser, RID_LENGTHOF)               \
+)
+
 /* We need to walk over decls with incomplete struct/union/enum types
    after parsing the whole translation unit.
    In finish_decl(), if the decl is static, has incomplete
@@ -1687,7 +1697,7 @@  static struct c_expr c_parser_binary_expression (c_parser *, struct c_expr *,
 						 tree);
 static struct c_expr c_parser_cast_expression (c_parser *, struct c_expr *);
 static struct c_expr c_parser_unary_expression (c_parser *);
-static struct c_expr c_parser_sizeof_expression (c_parser *);
+static struct c_expr c_parser_sizeof_or_lengthof_expression (c_parser *, enum rid);
 static struct c_expr c_parser_alignof_expression (c_parser *);
 static struct c_expr c_parser_postfix_expression (c_parser *);
 static struct c_expr c_parser_postfix_expression_after_paren_type (c_parser *,
@@ -9864,6 +9874,8 @@  c_parser_unary_expression (c_parser *parser)
     case CPP_KEYWORD:
       switch (c_parser_peek_token (parser)->keyword)
 	{
+	case RID_LENGTHOF:
+	  return c_parser_lengthof_expression (parser);
 	case RID_SIZEOF:
 	  return c_parser_sizeof_expression (parser);
 	case RID_ALIGNOF:
@@ -9903,12 +9915,13 @@  c_parser_unary_expression (c_parser *parser)
 /* Parse a sizeof expression.  */
 
 static struct c_expr
-c_parser_sizeof_expression (c_parser *parser)
+c_parser_sizeof_or_lengthof_expression (c_parser *parser, enum rid rid)
 {
+  const char *op_name = (rid == RID_LENGTHOF) ? "lengthof" : "sizeof";
   struct c_expr expr;
   struct c_expr result;
   location_t expr_loc;
-  gcc_assert (c_parser_next_token_is_keyword (parser, RID_SIZEOF));
+  gcc_assert (c_parser_next_token_is_keyword (parser, rid));
 
   location_t start;
   location_t finish = UNKNOWN_LOCATION;
@@ -9917,7 +9930,10 @@  c_parser_sizeof_expression (c_parser *parser)
 
   c_parser_consume_token (parser);
   c_inhibit_evaluation_warnings++;
-  in_sizeof++;
+  if (rid == RID_LENGTHOF)
+    in_lengthof++;
+  else
+    in_sizeof++;
   if (c_parser_next_token_is (parser, CPP_OPEN_PAREN)
       && c_token_starts_compound_literal (c_parser_peek_2nd_token (parser)))
     {
@@ -9936,7 +9952,10 @@  c_parser_sizeof_expression (c_parser *parser)
 	{
 	  struct c_expr ret;
 	  c_inhibit_evaluation_warnings--;
-	  in_sizeof--;
+	  if (rid == RID_LENGTHOF)
+	    in_lengthof--;
+	  else
+	    in_sizeof--;
 	  ret.set_error ();
 	  ret.original_code = ERROR_MARK;
 	  ret.original_type = NULL;
@@ -9948,31 +9967,45 @@  c_parser_sizeof_expression (c_parser *parser)
 							       type_name,
 							       expr_loc);
 	  finish = expr.get_finish ();
-	  goto sizeof_expr;
+	  goto Xof_expr;
 	}
       /* sizeof ( type-name ).  */
       if (scspecs)
-	error_at (expr_loc, "storage class specifier in %<sizeof%>");
+	error_at (expr_loc, "storage class specifier in %qs", op_name);
       if (type_name->specs->alignas_p)
 	error_at (type_name->specs->locations[cdw_alignas],
-		  "alignment specified for type name in %<sizeof%>");
+		  "alignment specified for type name in %qs", op_name);
       c_inhibit_evaluation_warnings--;
-      in_sizeof--;
-      result = c_expr_sizeof_type (expr_loc, type_name);
+      if (rid == RID_LENGTHOF)
+	{
+	  in_lengthof--;
+	  result = c_expr_lengthof_type (expr_loc, type_name);
+	}
+      else
+	{
+	  in_sizeof--;
+	  result = c_expr_sizeof_type (expr_loc, type_name);
+	}
     }
   else
     {
       expr_loc = c_parser_peek_token (parser)->location;
       expr = c_parser_unary_expression (parser);
       finish = expr.get_finish ();
-    sizeof_expr:
+    Xof_expr:
       c_inhibit_evaluation_warnings--;
-      in_sizeof--;
+      if (rid == RID_LENGTHOF)
+	in_lengthof--;
+      else
+	in_sizeof--;
       mark_exp_read (expr.value);
       if (TREE_CODE (expr.value) == COMPONENT_REF
 	  && DECL_C_BIT_FIELD (TREE_OPERAND (expr.value, 1)))
-	error_at (expr_loc, "%<sizeof%> applied to a bit-field");
-      result = c_expr_sizeof_expr (expr_loc, expr);
+	error_at (expr_loc, "%qs applied to a bit-field", op_name);
+      if (rid == RID_LENGTHOF)
+	result = c_expr_lengthof_expr (expr_loc, expr);
+      else
+	result = c_expr_sizeof_expr (expr_loc, expr);
     }
   if (finish == UNKNOWN_LOCATION)
     finish = start;
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 15da875a029..102fcfefea6 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -736,6 +736,7 @@  extern int c_type_dwarf_attribute (const_tree, int);
 /* in c-typeck.cc */
 extern int in_alignof;
 extern int in_sizeof;
+extern int in_lengthof;
 extern int in_typeof;
 extern bool c_in_omp_for;
 extern bool c_omp_array_section_p;
@@ -786,6 +787,9 @@  extern tree build_external_ref (location_t, tree, bool, tree *);
 extern void pop_maybe_used (bool);
 extern struct c_expr c_expr_sizeof_expr (location_t, struct c_expr);
 extern struct c_expr c_expr_sizeof_type (location_t, struct c_type_name *);
+extern struct c_expr c_expr_lengthof_expr (location_t, struct c_expr);
+extern struct c_expr c_expr_lengthof_type (location_t loc,
+                                           struct c_type_name *);
 extern struct c_expr parser_build_unary_op (location_t, enum tree_code,
     					    struct c_expr);
 extern struct c_expr parser_build_binary_op (location_t,
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 7e0f01ed22b..121e74de25d 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -71,6 +71,9 @@  int in_alignof;
 /* The level of nesting inside "sizeof".  */
 int in_sizeof;
 
+/* The level of nesting inside "sizeof".  */
+int in_lengthof;
+
 /* The level of nesting inside "typeof".  */
 int in_typeof;
 
@@ -3255,7 +3258,7 @@  build_external_ref (location_t loc, tree id, bool fun, tree *type)
 
   if (TREE_CODE (ref) == FUNCTION_DECL && !in_alignof)
     {
-      if (!in_sizeof && !in_typeof)
+      if (!in_sizeof && !in_typeof && !in_lengthof)
 	C_DECL_USED (ref) = 1;
       else if (DECL_INITIAL (ref) == NULL_TREE
 	       && DECL_EXTERNAL (ref)
@@ -3311,7 +3314,7 @@  struct maybe_used_decl
 {
   /* The decl.  */
   tree decl;
-  /* The level seen at (in_sizeof + in_typeof).  */
+  /* The level seen at (in_sizeof + in_typeof + in_lengthof).  */
   int level;
   /* The next one at this level or above, or NULL.  */
   struct maybe_used_decl *next;
@@ -3329,7 +3332,7 @@  record_maybe_used_decl (tree decl)
 {
   struct maybe_used_decl *t = XOBNEW (&parser_obstack, struct maybe_used_decl);
   t->decl = decl;
-  t->level = in_sizeof + in_typeof;
+  t->level = in_sizeof + in_typeof + in_lengthof;
   t->next = maybe_used_decls;
   maybe_used_decls = t;
 }
@@ -3343,7 +3346,7 @@  void
 pop_maybe_used (bool used)
 {
   struct maybe_used_decl *p = maybe_used_decls;
-  int cur_level = in_sizeof + in_typeof;
+  int cur_level = in_sizeof + in_typeof + in_lengthof;
   while (p && p->level > cur_level)
     {
       if (used)
@@ -3453,6 +3456,90 @@  c_expr_sizeof_type (location_t loc, struct c_type_name *t)
   return ret;
 }
 
+/* Return the result of lengthof applied to EXPR.  */
+
+struct c_expr
+c_expr_lengthof_expr (location_t loc, struct c_expr expr)
+{
+  struct c_expr ret;
+  if (expr.value == error_mark_node)
+    {
+      ret.value = error_mark_node;
+      ret.original_code = ERROR_MARK;
+      ret.original_type = NULL;
+      ret.m_decimal = 0;
+      pop_maybe_used (false);
+    }
+  else
+    {
+      bool expr_const_operands = true;
+
+      tree folded_expr = c_fully_fold (expr.value, require_constant_value,
+				       &expr_const_operands);
+      ret.value = c_lengthof_type (loc, TREE_TYPE (folded_expr));
+      c_last_sizeof_arg = expr.value;
+      c_last_sizeof_loc = loc;
+      ret.original_code = LENGTHOF_EXPR;
+      ret.original_type = NULL;
+      ret.m_decimal = 0;
+      if (C_TYPE_VARIABLE_SIZE (TREE_TYPE (folded_expr)))
+	{
+	  /* lengthof is evaluated when given a vla.  */
+	  ret.value = build2 (C_MAYBE_CONST_EXPR, TREE_TYPE (ret.value),
+			      folded_expr, ret.value);
+	  C_MAYBE_CONST_EXPR_NON_CONST (ret.value) = !expr_const_operands;
+	  SET_EXPR_LOCATION (ret.value, loc);
+	}
+      pop_maybe_used (C_TYPE_VARIABLE_SIZE (TREE_TYPE (folded_expr)));
+    }
+  return ret;
+}
+
+/* Return the result of lengthof applied to T, a structure for the type
+   name passed to _lengthof (rather than the type itself).  LOC is the
+   location of the original expression.  */
+
+struct c_expr
+c_expr_lengthof_type (location_t loc, struct c_type_name *t)
+{
+  tree type;
+  struct c_expr ret;
+  tree type_expr = NULL_TREE;
+  bool type_expr_const = true;
+  type = groktypename (t, &type_expr, &type_expr_const);
+  ret.value = c_lengthof_type (loc, type);
+  c_last_sizeof_arg = type;
+  c_last_sizeof_loc = loc;
+  ret.original_code = LENGTHOF_EXPR;
+  ret.original_type = NULL;
+  ret.m_decimal = 0;
+  if (type == error_mark_node)
+    {
+      ret.value = error_mark_node;
+      ret.original_code = ERROR_MARK;
+    }
+  else
+  if ((type_expr || TREE_CODE (ret.value) == INTEGER_CST)
+      && C_TYPE_VARIABLE_SIZE (type))
+    {
+      /* If the type is a [*] array, it is a VLA but is represented as
+	 having a size of zero.  In such a case we must ensure that
+	 the result of lengthof does not get folded to a constant by
+	 c_fully_fold, because if the length is evaluated the result is
+	 not constant and so constraints on zero or negative size
+	 arrays must not be applied when this lengthof call is inside
+	 another array declarator.  */
+      if (!type_expr)
+	type_expr = integer_zero_node;
+      ret.value = build2 (C_MAYBE_CONST_EXPR, TREE_TYPE (ret.value),
+			  type_expr, ret.value);
+      C_MAYBE_CONST_EXPR_NON_CONST (ret.value) = !type_expr_const;
+    }
+  pop_maybe_used (type != error_mark_node
+		  ? C_TYPE_VARIABLE_SIZE (type) : false);
+  return ret;
+}
+
 /* Build a function call to function FUNCTION with parameters PARAMS.
    The function call is at LOC.
    PARAMS is a list--a chain of TREE_LIST nodes--in which the
diff --git a/gcc/cp/operators.def b/gcc/cp/operators.def
index d8878923602..d640ed8bd91 100644
--- a/gcc/cp/operators.def
+++ b/gcc/cp/operators.def
@@ -91,6 +91,7 @@  DEF_OPERATOR ("co_await", CO_AWAIT_EXPR, "aw", OVL_OP_FLAG_UNARY)
 
 /* These are extensions.  */
 DEF_OPERATOR ("alignof", ALIGNOF_EXPR, "az", OVL_OP_FLAG_UNARY)
+DEF_OPERATOR ("__lengthof__", LENGTHOF_EXPR, "lz", OVL_OP_FLAG_UNARY)
 DEF_OPERATOR ("__imag__", IMAGPART_EXPR, "v18__imag__", OVL_OP_FLAG_UNARY)
 DEF_OPERATOR ("__real__", REALPART_EXPR, "v18__real__", OVL_OP_FLAG_UNARY)
 
diff --git a/gcc/target.h b/gcc/target.h
index c1f99b97b86..79890ae9944 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -245,6 +245,9 @@  enum type_context_kind {
   /* Directly measuring the alignment of T.  */
   TCTX_ALIGNOF,
 
+  /* Directly measuring the length of array T.  */
+  TCTX_LENGTHOF,
+
   /* Creating objects of type T with static storage duration.  */
   TCTX_STATIC_STORAGE,