diff mbox series

[v5,3/3] c: Add __lengthof__ operator

Message ID 20240806231151.1046913-4-alx@kernel.org
State New
Headers show
Series c: Add __lengthof__ operator | expand

Commit Message

Alejandro Colomar Aug. 6, 2024, 11:12 p.m. UTC
This operator is similar to sizeof but can only be applied to an array,
and returns its length (number of elements).

FUTURE DIRECTIONS:

  We could make it work with array parameters to functions, and
  somehow magically return the length designator of the array,
  regardless of it being really a pointer.

Cc: Joseph Myers <josmyers@redhat.com>
Cc: Gabriel Ravier <gabravier@gmail.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Qing Zhao <qing.zhao@oracle.com>
Cc: Jens Gustedt <jens.gustedt@inria.fr>
Cc: David Brown <david.brown@hesbynett.no>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Andreas Schwab <schwab@linux-m68k.org>

gcc/ChangeLog:

	* doc/extend.texi: Document __lengthof__ operator.
	* target.h (enum type_context_kind): Add __lengthof__ operator.

gcc/c-family/ChangeLog:

	* c-common.h:
	* c-common.def:
	* c-common.cc (c_lengthof_type): Add __lengthof__ operator.

gcc/c/ChangeLog:

	* c-tree.h
	(c_expr_lengthof_expr, c_expr_lengthof_type):
	* c-decl.cc
	(start_struct, finish_struct):
	(start_enum, finish_enum):
	* c-parser.cc
	(c_parser_sizeof_expression):
	(c_parser_lengthof_expression):
	(c_parser_sizeof_or_lengthof_expression):
	(c_parser_unary_expression):
	* c-typeck.cc
	(build_external_ref):
	(record_maybe_used_decl, pop_maybe_used):
	(is_top_array_vla):
	(c_expr_lengthof_expr, c_expr_lengthof_type):
	Add __lengthof__operator.

gcc/cp/ChangeLog:

	* operators.def: Add __lengthof__ operator.

gcc/testsuite/ChangeLog:

	* gcc.dg/lengthof-compile.c:
	* gcc.dg/lengthof.c: Add tests for __lengthof__ operator.

Link: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2529.pdf
Link: https://inbox.sourceware.org/gcc/M8S4oQy--3-2@tutanota.com/T/
Suggested-by: Xavier Del Campo Romero <xavi.dcr@tutanota.com>
Co-developed-by: Martin Uecker <uecker@tugraz.at>
Signed-off-by: Alejandro Colomar <alx@kernel.org>
---
 gcc/c-family/c-common.cc                |  26 +++++
 gcc/c-family/c-common.def               |   3 +
 gcc/c-family/c-common.h                 |   2 +
 gcc/c/c-decl.cc                         |  20 ++--
 gcc/c/c-parser.cc                       |  61 +++++++++---
 gcc/c/c-tree.h                          |   4 +
 gcc/c/c-typeck.cc                       | 114 ++++++++++++++++++++-
 gcc/cp/operators.def                    |   1 +
 gcc/doc/extend.texi                     |  23 +++++
 gcc/target.h                            |   3 +
 gcc/testsuite/gcc.dg/lengthof-compile.c |  49 +++++++++
 gcc/testsuite/gcc.dg/lengthof.c         | 127 ++++++++++++++++++++++++
 12 files changed, 409 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/lengthof-compile.c
 create mode 100644 gcc/testsuite/gcc.dg/lengthof.c

Comments

Alejandro Colomar Aug. 6, 2024, 11:14 p.m. UTC | #1
On Wed, Aug 07, 2024 at 01:12:17AM GMT, Alejandro Colomar wrote:
> This operator is similar to sizeof but can only be applied to an array,
> and returns its length (number of elements).
> 
> FUTURE DIRECTIONS:
> 
>   We could make it work with array parameters to functions, and
>   somehow magically return the length designator of the array,
>   regardless of it being really a pointer.
> 
> Cc: Joseph Myers <josmyers@redhat.com>
> Cc: Gabriel Ravier <gabravier@gmail.com>
> Cc: Jakub Jelinek <jakub@redhat.com>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Qing Zhao <qing.zhao@oracle.com>
> Cc: Jens Gustedt <jens.gustedt@inria.fr>
> Cc: David Brown <david.brown@hesbynett.no>
> Cc: Florian Weimer <fweimer@redhat.com>
> Cc: Andreas Schwab <schwab@linux-m68k.org>
> 
> gcc/ChangeLog:
> 
> 	* doc/extend.texi: Document __lengthof__ operator.
> 	* target.h (enum type_context_kind): Add __lengthof__ operator.
> 
> gcc/c-family/ChangeLog:
> 
> 	* c-common.h:
> 	* c-common.def:
> 	* c-common.cc (c_lengthof_type): Add __lengthof__ operator.
> 
> gcc/c/ChangeLog:
> 
> 	* c-tree.h
> 	(c_expr_lengthof_expr, c_expr_lengthof_type):
> 	* c-decl.cc
> 	(start_struct, finish_struct):
> 	(start_enum, finish_enum):
> 	* c-parser.cc
> 	(c_parser_sizeof_expression):
> 	(c_parser_lengthof_expression):
> 	(c_parser_sizeof_or_lengthof_expression):
> 	(c_parser_unary_expression):
> 	* c-typeck.cc
> 	(build_external_ref):
> 	(record_maybe_used_decl, pop_maybe_used):
> 	(is_top_array_vla):
> 	(c_expr_lengthof_expr, c_expr_lengthof_type):
> 	Add __lengthof__operator.
> 
> gcc/cp/ChangeLog:
> 
> 	* operators.def: Add __lengthof__ operator.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/lengthof-compile.c:
> 	* gcc.dg/lengthof.c: Add tests for __lengthof__ operator.
> 
> Link: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2529.pdf
> Link: https://inbox.sourceware.org/gcc/M8S4oQy--3-2@tutanota.com/T/
> Suggested-by: Xavier Del Campo Romero <xavi.dcr@tutanota.com>
> Co-developed-by: Martin Uecker <uecker@tugraz.at>
> Signed-off-by: Alejandro Colomar <alx@kernel.org>
> ---
>  gcc/c-family/c-common.cc                |  26 +++++
>  gcc/c-family/c-common.def               |   3 +
>  gcc/c-family/c-common.h                 |   2 +
>  gcc/c/c-decl.cc                         |  20 ++--
>  gcc/c/c-parser.cc                       |  61 +++++++++---
>  gcc/c/c-tree.h                          |   4 +
>  gcc/c/c-typeck.cc                       | 114 ++++++++++++++++++++-
>  gcc/cp/operators.def                    |   1 +
>  gcc/doc/extend.texi                     |  23 +++++
>  gcc/target.h                            |   3 +
>  gcc/testsuite/gcc.dg/lengthof-compile.c |  49 +++++++++
>  gcc/testsuite/gcc.dg/lengthof.c         | 127 ++++++++++++++++++++++++
>  12 files changed, 409 insertions(+), 24 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/lengthof-compile.c
>  create mode 100644 gcc/testsuite/gcc.dg/lengthof.c
> 
> diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
> index e7e371fd26f..9f5feb83345 100644
> --- a/gcc/c-family/c-common.cc
> +++ b/gcc/c-family/c-common.cc
> @@ -465,6 +465,7 @@ const struct c_common_resword c_common_reswords[] =
>    { "__inline",		RID_INLINE,	0 },
>    { "__inline__",	RID_INLINE,	0 },
>    { "__label__",	RID_LABEL,	0 },
> +  { "__lengthof__",	RID_LENGTHOF, 0 },
>    { "__null",		RID_NULL,	0 },
>    { "__real",		RID_REALPART,	0 },
>    { "__real__",		RID_REALPART,	0 },
> @@ -4070,6 +4071,31 @@ c_alignof_expr (location_t loc, tree expr)
>  
>    return fold_convert_loc (loc, size_type_node, t);
>  }
> +
> +/* Implement the lengthof keyword: Return the length of an array,
> +   that is, the number of elements in the array.  */
> +
> +tree
> +c_lengthof_type (location_t loc, tree type)
> +{
> +  enum tree_code type_code;
> +
> +  type_code = TREE_CODE (type);
> +  if (type_code != ARRAY_TYPE)
> +    {
> +      error_at (loc, "invalid application of %<lengthof%> to type %qT", type);
> +      return error_mark_node;
> +    }
> +  if (!COMPLETE_TYPE_P (type))
> +    {
> +      error_at (loc,
> +		"invalid application of %<lengthof%> to incomplete type %qT",
> +		type);
> +      return error_mark_node;
> +    }
> +
> +  return array_type_nelts_top (type);
> +}
>  
>  /* Handle C and C++ default attributes.  */
>  
> diff --git a/gcc/c-family/c-common.def b/gcc/c-family/c-common.def
> index 5de96e5d4a8..6d162f67104 100644
> --- a/gcc/c-family/c-common.def
> +++ b/gcc/c-family/c-common.def
> @@ -50,6 +50,9 @@ DEFTREECODE (EXCESS_PRECISION_EXPR, "excess_precision_expr", tcc_expression, 1)
>     number.  */
>  DEFTREECODE (USERDEF_LITERAL, "userdef_literal", tcc_exceptional, 3)
>  
> +/* Represents a 'lengthof' expression.  */
> +DEFTREECODE (LENGTHOF_EXPR, "lengthof_expr", tcc_expression, 1)
> +
>  /* Represents a 'sizeof' expression during C++ template expansion,
>     or for the purpose of -Wsizeof-pointer-memaccess warning.  */
>  DEFTREECODE (SIZEOF_EXPR, "sizeof_expr", tcc_expression, 1)
> diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
> index ccaea27c2b9..f815a4cf3bc 100644
> --- a/gcc/c-family/c-common.h
> +++ b/gcc/c-family/c-common.h
> @@ -105,6 +105,7 @@ enum rid
>  
>    /* C extensions */
>    RID_ASM,       RID_TYPEOF,   RID_TYPEOF_UNQUAL, RID_ALIGNOF,  RID_ATTRIBUTE,
> +  RID_LENGTHOF,
>    RID_VA_ARG,
>    RID_EXTENSION, RID_IMAGPART, RID_REALPART, RID_LABEL,    RID_CHOOSE_EXPR,
>    RID_TYPES_COMPATIBLE_P,      RID_BUILTIN_COMPLEX,	   RID_BUILTIN_SHUFFLE,
> @@ -885,6 +886,7 @@ extern tree c_common_truthvalue_conversion (location_t, tree);
>  extern void c_apply_type_quals_to_decl (int, tree);
>  extern tree c_sizeof_or_alignof_type (location_t, tree, bool, bool, int);
>  extern tree c_alignof_expr (location_t, tree);
> +extern tree c_lengthof_type (location_t, tree);
>  /* Print an error message for invalid operands to arith operation CODE.
>     NOP_EXPR is used as a special case (see truthvalue_conversion).  */
>  extern void binary_op_error (rich_location *, enum tree_code, tree, tree);
> diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
> index 4dced430d1f..790c58b2558 100644
> --- a/gcc/c/c-decl.cc
> +++ b/gcc/c/c-decl.cc
> @@ -8937,12 +8937,16 @@ start_struct (location_t loc, enum tree_code code, tree name,
>       within a statement expr used within sizeof, et. al.  This is not
>       terribly serious as C++ doesn't permit statement exprs within
>       sizeof anyhow.  */
> -  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof))
> +  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof || in_lengthof))
>      warning_at (loc, OPT_Wc___compat,
>  		"defining type in %qs expression is invalid in C++",
>  		(in_sizeof
>  		 ? "sizeof"
> -		 : (in_typeof ? "typeof" : "alignof")));
> +		 : (in_typeof
> +		    ? "typeof"
> +		    : (in_alignof
> +		       ? "alignof"
> +		       : "lengthof"))));
>  
>    if (in_underspecified_init)
>      error_at (loc, "%qT defined in underspecified object initializer", ref);
> @@ -9897,7 +9901,7 @@ finish_struct (location_t loc, tree t, tree fieldlist, tree attributes,
>  	 struct_types.  */
>        if (warn_cxx_compat
>  	  && struct_parse_info != NULL
> -	  && !in_sizeof && !in_typeof && !in_alignof)
> +	  && !in_sizeof && !in_typeof && !in_alignof && !in_lengthof)
>  	struct_parse_info->struct_types.safe_push (t);
>       }
>  
> @@ -10071,12 +10075,16 @@ start_enum (location_t loc, struct c_enum_contents *the_enum, tree name,
>    /* FIXME: This will issue a warning for a use of a type defined
>       within sizeof in a statement expr.  This is not terribly serious
>       as C++ doesn't permit statement exprs within sizeof anyhow.  */
> -  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof))
> +  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof || in_lengthof))
>      warning_at (loc, OPT_Wc___compat,
>  		"defining type in %qs expression is invalid in C++",
>  		(in_sizeof
>  		 ? "sizeof"
> -		 : (in_typeof ? "typeof" : "alignof")));
> +		 : (in_typeof
> +		    ? "typeof"
> +		    : (in_alignof
> +		       ? "alignof"
> +		       : "lengthof"))));
>  
>    if (in_underspecified_init)
>      error_at (loc, "%qT defined in underspecified object initializer",
> @@ -10270,7 +10278,7 @@ finish_enum (tree enumtype, tree values, tree attributes)
>       struct_types.  */
>    if (warn_cxx_compat
>        && struct_parse_info != NULL
> -      && !in_sizeof && !in_typeof && !in_alignof)
> +      && !in_sizeof && !in_typeof && !in_alignof && !in_lengthof)
>      struct_parse_info->struct_types.safe_push (enumtype);
>  
>    /* Check for consistency with previous definition */
> diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> index 12c5ed5d92c..09bb19f9299 100644
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -74,7 +74,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "bitmap.h"
>  #include "analyzer/analyzer-language.h"
>  #include "toplev.h"
> +
> +#define c_parser_sizeof_expression(parser)                                    \
> +(                                                                             \
> +  c_parser_sizeof_or_lengthof_expression (parser, RID_SIZEOF)                 \
> +)
>  
> +#define c_parser_lengthof_expression(parser)                                  \
> +(                                                                             \
> +  c_parser_sizeof_or_lengthof_expression (parser, RID_LENGTHOF)               \
> +)
> +
>  /* We need to walk over decls with incomplete struct/union/enum types
>     after parsing the whole translation unit.
>     In finish_decl(), if the decl is static, has incomplete
> @@ -1687,7 +1697,7 @@ static struct c_expr c_parser_binary_expression (c_parser *, struct c_expr *,
>  						 tree);
>  static struct c_expr c_parser_cast_expression (c_parser *, struct c_expr *);
>  static struct c_expr c_parser_unary_expression (c_parser *);
> -static struct c_expr c_parser_sizeof_expression (c_parser *);
> +static struct c_expr c_parser_sizeof_or_lengthof_expression (c_parser *, enum rid);
>  static struct c_expr c_parser_alignof_expression (c_parser *);
>  static struct c_expr c_parser_postfix_expression (c_parser *);
>  static struct c_expr c_parser_postfix_expression_after_paren_type (c_parser *,
> @@ -9864,6 +9874,8 @@ c_parser_unary_expression (c_parser *parser)
>      case CPP_KEYWORD:
>        switch (c_parser_peek_token (parser)->keyword)
>  	{
> +	case RID_LENGTHOF:
> +	  return c_parser_lengthof_expression (parser);
>  	case RID_SIZEOF:
>  	  return c_parser_sizeof_expression (parser);
>  	case RID_ALIGNOF:
> @@ -9903,12 +9915,13 @@ c_parser_unary_expression (c_parser *parser)
>  /* Parse a sizeof expression.  */
>  
>  static struct c_expr
> -c_parser_sizeof_expression (c_parser *parser)
> +c_parser_sizeof_or_lengthof_expression (c_parser *parser, enum rid rid)
>  {
> +  const char *op_name = (rid == RID_LENGTHOF) ? "lengthof" : "sizeof";
>    struct c_expr expr;
>    struct c_expr result;
>    location_t expr_loc;
> -  gcc_assert (c_parser_next_token_is_keyword (parser, RID_SIZEOF));
> +  gcc_assert (c_parser_next_token_is_keyword (parser, rid));
>  
>    location_t start;
>    location_t finish = UNKNOWN_LOCATION;
> @@ -9917,7 +9930,10 @@ c_parser_sizeof_expression (c_parser *parser)
>  
>    c_parser_consume_token (parser);
>    c_inhibit_evaluation_warnings++;
> -  in_sizeof++;
> +  if (rid == RID_LENGTHOF)
> +    in_lengthof++;
> +  else
> +    in_sizeof++;
>    if (c_parser_next_token_is (parser, CPP_OPEN_PAREN)
>        && c_token_starts_compound_literal (c_parser_peek_2nd_token (parser)))
>      {
> @@ -9936,7 +9952,10 @@ c_parser_sizeof_expression (c_parser *parser)
>  	{
>  	  struct c_expr ret;
>  	  c_inhibit_evaluation_warnings--;
> -	  in_sizeof--;
> +	  if (rid == RID_LENGTHOF)
> +	    in_lengthof--;
> +	  else
> +	    in_sizeof--;
>  	  ret.set_error ();
>  	  ret.original_code = ERROR_MARK;
>  	  ret.original_type = NULL;
> @@ -9948,31 +9967,45 @@ c_parser_sizeof_expression (c_parser *parser)
>  							       type_name,
>  							       expr_loc);
>  	  finish = expr.get_finish ();
> -	  goto sizeof_expr;
> +	  goto Xof_expr;
>  	}
>        /* sizeof ( type-name ).  */
>        if (scspecs)
> -	error_at (expr_loc, "storage class specifier in %<sizeof%>");
> +	error_at (expr_loc, "storage class specifier in %qs", op_name);
>        if (type_name->specs->alignas_p)
>  	error_at (type_name->specs->locations[cdw_alignas],
> -		  "alignment specified for type name in %<sizeof%>");
> +		  "alignment specified for type name in %qs", op_name);
>        c_inhibit_evaluation_warnings--;
> -      in_sizeof--;
> -      result = c_expr_sizeof_type (expr_loc, type_name);
> +      if (rid == RID_LENGTHOF)
> +	{
> +	  in_lengthof--;
> +	  result = c_expr_lengthof_type (expr_loc, type_name);
> +	}
> +      else
> +	{
> +	  in_sizeof--;
> +	  result = c_expr_sizeof_type (expr_loc, type_name);
> +	}
>      }
>    else
>      {
>        expr_loc = c_parser_peek_token (parser)->location;
>        expr = c_parser_unary_expression (parser);
>        finish = expr.get_finish ();
> -    sizeof_expr:
> +    Xof_expr:
>        c_inhibit_evaluation_warnings--;
> -      in_sizeof--;
> +      if (rid == RID_LENGTHOF)
> +	in_lengthof--;
> +      else
> +	in_sizeof--;
>        mark_exp_read (expr.value);
>        if (TREE_CODE (expr.value) == COMPONENT_REF
>  	  && DECL_C_BIT_FIELD (TREE_OPERAND (expr.value, 1)))
> -	error_at (expr_loc, "%<sizeof%> applied to a bit-field");
> -      result = c_expr_sizeof_expr (expr_loc, expr);
> +	error_at (expr_loc, "%qs applied to a bit-field", op_name);
> +      if (rid == RID_LENGTHOF)
> +	result = c_expr_lengthof_expr (expr_loc, expr);
> +      else
> +	result = c_expr_sizeof_expr (expr_loc, expr);
>      }
>    if (finish == UNKNOWN_LOCATION)
>      finish = start;
> diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
> index 15da875a029..102fcfefea6 100644
> --- a/gcc/c/c-tree.h
> +++ b/gcc/c/c-tree.h
> @@ -736,6 +736,7 @@ extern int c_type_dwarf_attribute (const_tree, int);
>  /* in c-typeck.cc */
>  extern int in_alignof;
>  extern int in_sizeof;
> +extern int in_lengthof;
>  extern int in_typeof;
>  extern bool c_in_omp_for;
>  extern bool c_omp_array_section_p;
> @@ -786,6 +787,9 @@ extern tree build_external_ref (location_t, tree, bool, tree *);
>  extern void pop_maybe_used (bool);
>  extern struct c_expr c_expr_sizeof_expr (location_t, struct c_expr);
>  extern struct c_expr c_expr_sizeof_type (location_t, struct c_type_name *);
> +extern struct c_expr c_expr_lengthof_expr (location_t, struct c_expr);
> +extern struct c_expr c_expr_lengthof_type (location_t loc,
> +                                           struct c_type_name *);
>  extern struct c_expr parser_build_unary_op (location_t, enum tree_code,
>      					    struct c_expr);
>  extern struct c_expr parser_build_binary_op (location_t,
> diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
> index 7e0f01ed22b..98e8d31cb3b 100644
> --- a/gcc/c/c-typeck.cc
> +++ b/gcc/c/c-typeck.cc
> @@ -71,6 +71,9 @@ int in_alignof;
>  /* The level of nesting inside "sizeof".  */
>  int in_sizeof;
>  
> +/* The level of nesting inside "lengthof".  */
> +int in_lengthof;
> +
>  /* The level of nesting inside "typeof".  */
>  int in_typeof;
>  
> @@ -3255,7 +3258,7 @@ build_external_ref (location_t loc, tree id, bool fun, tree *type)
>  
>    if (TREE_CODE (ref) == FUNCTION_DECL && !in_alignof)
>      {
> -      if (!in_sizeof && !in_typeof)
> +      if (!in_sizeof && !in_typeof && !in_lengthof)
>  	C_DECL_USED (ref) = 1;
>        else if (DECL_INITIAL (ref) == NULL_TREE
>  	       && DECL_EXTERNAL (ref)
> @@ -3311,7 +3314,7 @@ struct maybe_used_decl
>  {
>    /* The decl.  */
>    tree decl;
> -  /* The level seen at (in_sizeof + in_typeof).  */
> +  /* The level seen at (in_sizeof + in_typeof + in_lengthof).  */
>    int level;
>    /* The next one at this level or above, or NULL.  */
>    struct maybe_used_decl *next;
> @@ -3329,7 +3332,7 @@ record_maybe_used_decl (tree decl)
>  {
>    struct maybe_used_decl *t = XOBNEW (&parser_obstack, struct maybe_used_decl);
>    t->decl = decl;
> -  t->level = in_sizeof + in_typeof;
> +  t->level = in_sizeof + in_typeof + in_lengthof;
>    t->next = maybe_used_decls;
>    maybe_used_decls = t;
>  }
> @@ -3343,7 +3346,7 @@ void
>  pop_maybe_used (bool used)
>  {
>    struct maybe_used_decl *p = maybe_used_decls;
> -  int cur_level = in_sizeof + in_typeof;
> +  int cur_level = in_sizeof + in_typeof + in_lengthof;
>    while (p && p->level > cur_level)
>      {
>        if (used)
> @@ -3453,6 +3456,109 @@ c_expr_sizeof_type (location_t loc, struct c_type_name *t)
>    return ret;
>  }
>  
> +static bool
> +is_top_array_vla (tree type)
> +{
> +  bool zero, var;
> +  tree d;
> +
> +  if (TREE_CODE (type) != ARRAY_TYPE)
> +    return false;
> +  if (!COMPLETE_TYPE_P (type))
> +    return false;
> +
> +  d = TYPE_DOMAIN (type);
> +  zero = !TYPE_MAX_VALUE (d);
> +  var = (!zero
> +	 && (TREE_CODE (TYPE_MIN_VALUE (d)) != INTEGER_CST
> +	     || TREE_CODE (TYPE_MAX_VALUE (d)) != INTEGER_CST));
> +  var = var || (zero && C_TYPE_VARIABLE_SIZE (type));
> +  return var;
> +}
> +
> +/* Return the result of lengthof applied to EXPR.  */
> +
> +struct c_expr
> +c_expr_lengthof_expr (location_t loc, struct c_expr expr)
> +{
> +  struct c_expr ret;
> +  if (expr.value == error_mark_node)
> +    {
> +      ret.value = error_mark_node;
> +      ret.original_code = ERROR_MARK;
> +      ret.original_type = NULL;
> +      ret.m_decimal = 0;
> +      pop_maybe_used (false);
> +    }
> +  else
> +    {
> +      bool expr_const_operands = true;
> +
> +      tree folded_expr = c_fully_fold (expr.value, require_constant_value,
> +				       &expr_const_operands);
> +      ret.value = c_lengthof_type (loc, TREE_TYPE (folded_expr));
> +      c_last_sizeof_arg = expr.value;
> +      c_last_sizeof_loc = loc;

This part is something I had no idea what it's for.  Please check what I
should do with it.

> +      ret.original_code = LENGTHOF_EXPR;
> +      ret.original_type = NULL;
> +      ret.m_decimal = 0;
> +      if (is_top_array_vla (TREE_TYPE (folded_expr)))
> +	{
> +	  /* lengthof is evaluated when given a vla.  */
> +	  ret.value = build2 (C_MAYBE_CONST_EXPR, TREE_TYPE (ret.value),
> +			      folded_expr, ret.value);
> +	  C_MAYBE_CONST_EXPR_NON_CONST (ret.value) = !expr_const_operands;
> +	  SET_EXPR_LOCATION (ret.value, loc);
> +	}
> +      pop_maybe_used (is_top_array_vla (TREE_TYPE (folded_expr)));
> +    }
> +  return ret;
> +}
> +
> +/* Return the result of lengthof applied to T, a structure for the type
> +   name passed to _lengthof (rather than the type itself).  LOC is the
> +   location of the original expression.  */
> +
> +struct c_expr
> +c_expr_lengthof_type (location_t loc, struct c_type_name *t)
> +{
> +  tree type;
> +  struct c_expr ret;
> +  tree type_expr = NULL_TREE;
> +  bool type_expr_const = true;
> +  type = groktypename (t, &type_expr, &type_expr_const);
> +  ret.value = c_lengthof_type (loc, type);
> +  c_last_sizeof_arg = type;
> +  c_last_sizeof_loc = loc;
> +  ret.original_code = LENGTHOF_EXPR;
> +  ret.original_type = NULL;
> +  ret.m_decimal = 0;
> +  if (type == error_mark_node)
> +    {
> +      ret.value = error_mark_node;
> +      ret.original_code = ERROR_MARK;
> +    }
> +  else
> +  if ((type_expr || TREE_CODE (ret.value) == INTEGER_CST)
> +      && is_top_array_vla (type))
> +    {
> +      /* If the type is a [*] array, it is a VLA but is represented as
> +	 having a size of zero.  In such a case we must ensure that
> +	 the result of lengthof does not get folded to a constant by
> +	 c_fully_fold, because if the length is evaluated the result is
> +	 not constant and so constraints on zero or negative size
> +	 arrays must not be applied when this lengthof call is inside
> +	 another array declarator.  */
> +      if (!type_expr)
> +	type_expr = integer_zero_node;
> +      ret.value = build2 (C_MAYBE_CONST_EXPR, TREE_TYPE (ret.value),
> +			  type_expr, ret.value);
> +      C_MAYBE_CONST_EXPR_NON_CONST (ret.value) = !type_expr_const;
> +    }
> +  pop_maybe_used (type != error_mark_node ? is_top_array_vla (type) : false);
> +  return ret;
> +}
> +
>  /* Build a function call to function FUNCTION with parameters PARAMS.
>     The function call is at LOC.
>     PARAMS is a list--a chain of TREE_LIST nodes--in which the
> diff --git a/gcc/cp/operators.def b/gcc/cp/operators.def
> index d8878923602..d640ed8bd91 100644
> --- a/gcc/cp/operators.def
> +++ b/gcc/cp/operators.def
> @@ -91,6 +91,7 @@ DEF_OPERATOR ("co_await", CO_AWAIT_EXPR, "aw", OVL_OP_FLAG_UNARY)
>  
>  /* These are extensions.  */
>  DEF_OPERATOR ("alignof", ALIGNOF_EXPR, "az", OVL_OP_FLAG_UNARY)
> +DEF_OPERATOR ("__lengthof__", LENGTHOF_EXPR, "lz", OVL_OP_FLAG_UNARY)
>  DEF_OPERATOR ("__imag__", IMAGPART_EXPR, "v18__imag__", OVL_OP_FLAG_UNARY)
>  DEF_OPERATOR ("__real__", REALPART_EXPR, "v18__real__", OVL_OP_FLAG_UNARY)
>  
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 0b572afca72..21608eb43a6 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -10391,6 +10391,29 @@ If the operand of the @code{__alignof__} expression is a function,
>  the expression evaluates to the alignment of the function which may
>  be specified by attribute @code{aligned} (@pxref{Common Function Attributes}).
>  
> +@node Length
> +@section Determining the Length of Arrays
> +@cindex lengthof
> +@cindex length
> +@cindex array length
> +
> +The keyword @code{__lengthof__} determines the length of an array operand,
> +that is, the number of elements in the array.
> +Its syntax is just like @code{sizeof}.
> +The operand must be a complete array type.
> +The operand is not evaluated
> +if the top-level length designator is an integer constant expression
> +(in this case, the operator results in a constant expression);
> +and it is evaluated
> +if the top-level length designator is not an integer constant expression
> +(in this case, the operator results in a run-time value).
> +For example:
> +
> +@smallexample
> +__lengthof__ (int [7][n++]);  // constexpr
> +__lengthof__ (int [n++][7]);  // run-time value
> +@end smallexample
> +
>  @node Inline
>  @section An Inline Function is As Fast As a Macro
>  @cindex inline functions
> diff --git a/gcc/target.h b/gcc/target.h
> index c1f99b97b86..79890ae9944 100644
> --- a/gcc/target.h
> +++ b/gcc/target.h
> @@ -245,6 +245,9 @@ enum type_context_kind {
>    /* Directly measuring the alignment of T.  */
>    TCTX_ALIGNOF,
>  
> +  /* Directly measuring the length of array T.  */
> +  TCTX_LENGTHOF,
> +
>    /* Creating objects of type T with static storage duration.  */
>    TCTX_STATIC_STORAGE,
>  
> diff --git a/gcc/testsuite/gcc.dg/lengthof-compile.c b/gcc/testsuite/gcc.dg/lengthof-compile.c
> new file mode 100644
> index 00000000000..6b44704ca7e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lengthof-compile.c
> @@ -0,0 +1,49 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
> +
> +extern int x[];
> +
> +void
> +incomplete (int p[])
> +{
> +  unsigned n;
> +
> +  n = __lengthof__ (x);  /* { dg-error "incomplete" } */
> +
> +  /* We want to support the following one in the future,
> +     but for now it should fail.  */
> +  n = __lengthof__ (p);  /* { dg-error "invalid" } */
> +}
> +
> +void
> +fam (void)
> +{
> +  struct {
> +    int x;
> +    int fam[];
> +  } s;
> +  unsigned n;
> +
> +  n = __lengthof__ (s.fam); /* { dg-error "incomplete" } */
> +}
> +
> +void fix_fix (int i, char (*a)[3][5], int (*x)[__lengthof__ (*a)]);
> +void fix_var (int i, char (*a)[3][i], int (*x)[__lengthof__ (*a)]);
> +void fix_uns (int i, char (*a)[3][*], int (*x)[__lengthof__ (*a)]);
> +
> +void
> +func (void)
> +{
> +  int  i3[3];
> +  int  i5[5];
> +  char c35[3][5];
> +
> +  fix_fix (5, &c35, &i3);
> +  fix_fix (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
> +
> +  fix_var (5, &c35, &i3);
> +  fix_var (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
> +
> +  fix_uns (5, &c35, &i3);
> +  fix_uns (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/lengthof.c b/gcc/testsuite/gcc.dg/lengthof.c
> new file mode 100644
> index 00000000000..38da5df52a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lengthof.c
> @@ -0,0 +1,127 @@
> +/* { dg-do run } */
> +/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
> +
> +#undef NDEBUG
> +#include <assert.h>
> +
> +void
> +array (void)
> +{
> +  short a[7];
> +
> +  assert (__lengthof__ (a) == 7);
> +  assert (__lengthof__ (long [0]) == 0);
> +  assert (__lengthof__ (unsigned [99]) == 99);
> +}
> +
> +void
> +vla (void)
> +{
> +  unsigned n;
> +
> +  n = 99;
> +  assert (__lengthof__ (short [n - 10]) == 99 - 10);
> +
> +  int v[n / 2];
> +  assert (__lengthof__ (v) == 99 / 2);
> +
> +  n = 0;
> +  int z[n];
> +  assert (__lengthof__ (z) == 0);
> +}
> +
> +void
> +member (void)
> +{
> +  struct {
> +    int a[8];
> +  } s;
> +
> +  assert (__lengthof__ (s.a) == 8);
> +}
> +
> +void
> +vla_eval (void)
> +{
> +  int i;
> +
> +  i = 7;
> +  assert (__lengthof__ (struct {int x;}[i++]) == 7);
> +  assert (i == 7 + 1);
> +
> +  int v[i];
> +  int (*p)[i];
> +  p = &v;
> +  assert (__lengthof__ (*p++) == i);
> +  assert (p - 1 == &v);
> +}
> +
> +void
> +inner_vla_noeval (void)
> +{
> +  int i;
> +
> +  i = 3;
> +  assert (__lengthof__ (struct {int x[i++];}[3]) == 3);
> +  assert (i == 3);
> +}
> +
> +void
> +array_noeval (void)
> +{
> +  long a[5];
> +  long (*p)[__lengthof__ (a)];
> +
> +  p = &a;
> +  assert (__lengthof__ (*p++) == 5);
> +  assert (p == &a);
> +}
> +
> +void
> +matrix_zero (void)
> +{
> +  int i;
> +
> +  assert (__lengthof__ (int [0][4]) == 0);
> +  i = 3;
> +  assert (__lengthof__ (int [0][i]) == 0);
> +}
> +
> +void
> +matrix_fixed (void)
> +{
> +  int i;
> +
> +  assert (__lengthof__ (int [7][4]) == 7);
> +  i = 3;
> +  assert (__lengthof__ (int [7][i]) == 7);
> +}
> +
> +void
> +matrix_vla (void)
> +{
> +  int i, j;
> +
> +  i = 7;
> +  assert (__lengthof__ (int [i++][4]) == 7);
> +  assert (i == 7 + 1);
> +
> +  i = 9;
> +  j = 3;
> +  assert (__lengthof__ (int [i++][j]) == 9);
> +  assert (i == 9 + 1);
> +}
> +
> +int
> +main (void)
> +{
> +  array ();
> +  vla ();
> +  member ();
> +  vla_eval ();
> +  inner_vla_noeval ();
> +  array_noeval ();
> +  matrix_zero ();
> +  matrix_fixed ();
> +  matrix_vla ();
> +}
> -- 
> 2.45.2
>
Martin Uecker Aug. 7, 2024, 7:13 a.m. UTC | #2
Am Mittwoch, dem 07.08.2024 um 01:12 +0200 schrieb Alejandro Colomar:


Hi Alex,

a coupled of comments below.

> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -74,7 +74,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "bitmap.h"
>  #include "analyzer/analyzer-language.h"
>  #include "toplev.h"
> +
> +#define c_parser_sizeof_expression(parser)                                    \
> +(                                                                             \
> +  c_parser_sizeof_or_lengthof_expression (parser, RID_SIZEOF)                 \
> +)
>  
> +#define c_parser_lengthof_expression(parser)                                  \
> +(                                                                             \
> +  c_parser_sizeof_or_lengthof_expression (parser, RID_LENGTHOF)               \
> +)
> +

I suggest to avoid the macros.  I think the original function calls are
clear enough and this is then just another detour for somebody trying
to follow the code.  Or is there a reason I am missing?

...

> diff --git a/gcc/testsuite/gcc.dg/lengthof-compile.c b/gcc/testsuite/gcc.dg/lengthof-compile.c
> new file mode 100644
> index 00000000000..6b44704ca7e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lengthof-compile.c
> @@ -0,0 +1,49 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
> +
> +extern int x[];
> +
> +void
> +incomplete (int p[])
> +{
> +  unsigned n;
> +
> +  n = __lengthof__ (x);  /* { dg-error "incomplete" } */
> +
> +  /* We want to support the following one in the future,
> +     but for now it should fail.  */
> +  n = __lengthof__ (p);  /* { dg-error "invalid" } */
> +}
> +
> +void
> +fam (void)
> +{
> +  struct {
> +    int x;
> +    int fam[];
> +  } s;
> +  unsigned n;
> +
> +  n = __lengthof__ (s.fam); /* { dg-error "incomplete" } */
> +}
> +
> +void fix_fix (int i, char (*a)[3][5], int (*x)[__lengthof__ (*a)]);
> +void fix_var (int i, char (*a)[3][i], int (*x)[__lengthof__ (*a)]);
> +void fix_uns (int i, char (*a)[3][*], int (*x)[__lengthof__ (*a)]);


It would include a test that shows that when lengthof
is applied to [*] that it remains formally non-constant.  For example,
you could test with -Wvla-parameter that the two declarations do not give a
warning:

void foo(char (*a)[*], int x[*]);
void foo(char (*a)[*], int x[__lengthof__(*a)]);


(With  int (*x)[*]  we would run into the issue that we can not
distinguish zero arrays from unspecified ones, PR 98539)


> +
> +void
> +func (void)
> +{
> +  int  i3[3];
> +  int  i5[5];
> +  char c35[3][5];
> +
> +  fix_fix (5, &c35, &i3);
> +  fix_fix (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
> +
> +  fix_var (5, &c35, &i3);
> +  fix_var (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
> +
> +  fix_uns (5, &c35, &i3);
> +  fix_uns (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/lengthof.c b/gcc/testsuite/gcc.dg/lengthof.c
> new file mode 100644
> index 00000000000..38da5df52a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lengthof.c
> @@ -0,0 +1,127 @@
> +/* { dg-do run } */
> +/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
> +
> +#undef NDEBUG
> +#include <assert.h>
> +
> +void
> +array (void)
> +{
> +  short a[7];
> +
> +  assert (__lengthof__ (a) == 7);
> +  assert (__lengthof__ (long [0]) == 0);
> +  assert (__lengthof__ (unsigned [99]) == 99);
> +}

Instead of using assert you can use

if (! ...) __builtin_abort();

to avoid the include in the testsuite. Ā 

Otherwise it looks fine from my side.

Joseph needs to approve and may have more comments.

Martin
Alejandro Colomar Aug. 7, 2024, 9:14 a.m. UTC | #3
Hi Martin,

On Wed, Aug 07, 2024 at 09:13:07AM GMT, Martin Uecker wrote:
> Am Mittwoch, dem 07.08.2024 um 01:12 +0200 schrieb Alejandro Colomar:
> > +#define c_parser_lengthof_expression(parser)                                  \
> > +(                                                                             \
> > +  c_parser_sizeof_or_lengthof_expression (parser, RID_LENGTHOF)               \
> > +)
> > +
> 
> I suggest to avoid the macros.  I think the original function calls are
> clear enough and this is then just another detour for somebody trying
> to follow the code.  Or is there a reason I am missing?

I imitated the following ones that already exist:

	c-family/c-common.h:923:
	#define c_sizeof(LOC, T)  c_sizeof_or_alignof_type (LOC, T, true, false, 1)

	cp/cp-tree.h:8318:
	#define cxx_sizeof(T)  cxx_sizeof_or_alignof_type (input_location, T, SIZEOF_EXPR, false, true)

	c-family/c-common.h:924:
	#define c_alignof(LOC, T) c_sizeof_or_alignof_type (LOC, T, false, false, 1)

But I'm fine using it raw.

> > +void fix_fix (int i, char (*a)[3][5], int (*x)[__lengthof__ (*a)]);
> > +void fix_var (int i, char (*a)[3][i], int (*x)[__lengthof__ (*a)]);
> > +void fix_uns (int i, char (*a)[3][*], int (*x)[__lengthof__ (*a)]);
> 
> 
> It would include a test that shows that when lengthof
> is applied to [*] that it remains formally non-constant.  For example,
> you could test with -Wvla-parameter that the two declarations do not give a
> warning:
> 
> void foo(char (*a)[*], int x[*]);
> void foo(char (*a)[*], int x[__lengthof__(*a)]);

But [*] is a VLA.  Do we want to return a constexpr for it?

> (With  int (*x)[*]  we would run into the issue that we can not
> distinguish zero arrays from unspecified ones, PR 98539)

As Martin Sebor said, I need to choose between supporting well [0] or
supporting well [*], but not both.

I would personally prefer supporting [0], and consider that not
supporting [*] is a bug in the implementation of [*] (and thus not my
problem).

However, since GCC doesn't support 0-length arrays, I'm not sure that
would be correct.

What do you think?

Does anyone oppose treating [0] as a constexpr 0 length?  That means not
supporting well [*], but please fix it separately, which Martin Uecker
is working on.  :)

> > diff --git a/gcc/testsuite/gcc.dg/lengthof.c b/gcc/testsuite/gcc.dg/lengthof.c
> > new file mode 100644
> > index 00000000000..38da5df52a5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/lengthof.c
> > @@ -0,0 +1,127 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
> > +
> > +#undef NDEBUG
> > +#include <assert.h>
> > +
> > +void
> > +array (void)
> > +{
> > +  short a[7];
> > +
> > +  assert (__lengthof__ (a) == 7);
> > +  assert (__lengthof__ (long [0]) == 0);
> > +  assert (__lengthof__ (unsigned [99]) == 99);
> > +}
> 
> Instead of using assert you can use
> 
> if (! ...) __builtin_abort();
> 
> to avoid the include in the testsuite. Ā 

Is it frowned upon to include something?  I prefer assert(3).

> Otherwise it looks fine from my side.
> 
> Joseph needs to approve and may have more comments.

Thanks!

> 
> Martin

Have a lovely day!
Alex
Martin Uecker Aug. 7, 2024, 10:07 a.m. UTC | #4
Am Mittwoch, dem 07.08.2024 um 11:14 +0200 schrieb Alejandro Colomar:
> Hi Martin,
> 

> > > +void fix_fix (int i, char (*a)[3][5], int (*x)[__lengthof__ (*a)]);
> > > +void fix_var (int i, char (*a)[3][i], int (*x)[__lengthof__ (*a)]);
> > > +void fix_uns (int i, char (*a)[3][*], int (*x)[__lengthof__ (*a)]);
> > 
> > 
> > It would include a test that shows that when lengthof
> > is applied to [*] that it remains formally non-constant.  For example,
> > you could test with -Wvla-parameter that the two declarations do not give a
> > warning:
> > 
> > void foo(char (*a)[*], int x[*]);
> > void foo(char (*a)[*], int x[__lengthof__(*a)]);
> 
> But [*] is a VLA.  Do we want to return a constexpr for it?

No,  my point is only that we could have a test for not
returning a constant.Ā 

If __lengthof__ would incorrectly return an integer constant
expression then you would get a warning with -Wvla-parameter.  So
adding these two declarations to the tests and activating
the warning would ensure that the int[__lengthof__(*a)]
is a VLA:  https://godbolt.org/z/7P7qW15ah

> 
> > (With  int (*x)[*]  we would run into the issue that we can not
> > distinguish zero arrays from unspecified ones, PR 98539)
> 
> As Martin Sebor said, I need to choose between supporting well [0] or
> supporting well [*], but not both.

If you have only one array index this works. (and should
already work correctly with your patch)

> 
> I would personally prefer supporting [0], and consider that not
> supporting [*] is a bug in the implementation of [*] (and thus not my
> problem).
> 
> However, since GCC doesn't support 0-length arrays, I'm not sure that
> would be correct.
> 
> What do you think?

I think the logic in your patch is OK as is.  It does not exactly
what you want, as it now treats some [0] as [*] but I would not
make the logic more complex here when we will fix it properly
anyway.

> 
> Does anyone oppose treating [0] as a constexpr 0 length?  That means not
> supporting well [*], but please fix it separately, which Martin Uecker
> is working on.  :)
> 
> > > diff --git a/gcc/testsuite/gcc.dg/lengthof.c b/gcc/testsuite/gcc.dg/lengthof.c
> > > new file mode 100644
> > > index 00000000000..38da5df52a5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/lengthof.c
> > > @@ -0,0 +1,127 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
> > > +
> > > +#undef NDEBUG
> > > +#include <assert.h>
> > > +
> > > +void
> > > +array (void)
> > > +{
> > > +  short a[7];
> > > +
> > > +  assert (__lengthof__ (a) == 7);
> > > +  assert (__lengthof__ (long [0]) == 0);
> > > +  assert (__lengthof__ (unsigned [99]) == 99);
> > > +}
> > 
> > Instead of using assert you can use
> > 
> > if (! ...) __builtin_abort();
> > 
> > to avoid the include in the testsuite. Ā 
> 
> Is it frowned upon to include something?  I prefer assert(3).

It makes the tests run faster.  At least people told me before
to avoid includes in tests for this reason.   But from my side
assert is ok too.

Martin


> 
> > Otherwise it looks fine from my side.
> > 
> > Joseph needs to approve and may have more comments.
> 
> Thanks!
> 
> > 
> > Martin
> 
> Have a lovely day!
> Alex
>
Joseph Myers Aug. 7, 2024, 3:05 p.m. UTC | #5
On Wed, 7 Aug 2024, Alejandro Colomar wrote:

> +@node Length
> +@section Determining the Length of Arrays
> +@cindex lengthof
> +@cindex length
> +@cindex array length
> +
> +The keyword @code{__lengthof__} determines the length of an array operand,
> +that is, the number of elements in the array.
> +Its syntax is just like @code{sizeof}.
> +The operand must be a complete array type.

I think you mean the operand must be *an expression whose type is a 
complete array type* or *a type name for a complete array type*.  The 
wording you have suggests only type names, you need to be clear about both 
kinds of operands being possible (and include examples for them).

> +@smallexample
> +__lengthof__ (int [7][n++]);  // constexpr
> +__lengthof__ (int [n++][7]);  // run-time value
> +@end smallexample

I don't think using "constexpr" to mean "constant expression" is a good 
idea, they're different things.

> +void
> +incomplete (int p[])
> +{
> +  unsigned n;
> +
> +  n = __lengthof__ (x);  /* { dg-error "incomplete" } */
> +
> +  /* We want to support the following one in the future,
> +     but for now it should fail.  */
> +  n = __lengthof__ (p);  /* { dg-error "invalid" } */

This seems to be the only test you have for a non-array operand.  I'd 
expect such tests (both for type name operands and for expression 
operands) covering cases that we *don't* want to support in future, not 
just this one that we would like to be supportable in future.

I don't see any tests for the constraints on external definitions from 
6.9.1 that we discussed - that referenced to undefined internal linkage 
identifiers are OK inside __lengthof__ returning a constant (both 
constant-length arrays of non-VLA and constant-length arrays of VLA) but 
not in the cases where __lengthof__ is evaluated.
Jens Gustedt Aug. 7, 2024, 3:30 p.m. UTC | #6
Hi

Am 7. August 2024 17:05:48 MESZ schrieb Joseph Myers <josmyers@redhat.com>:
> On Wed, 7 Aug 2024, Alejandro Colomar wrote:
> 
> > +@node Length
> > +@section Determining the Length of Arrays
> > +@cindex lengthof
> > +@cindex length
> > +@cindex array length
> > +
> > +The keyword @code{__lengthof__} determines the length of an array operand,
> > +that is, the number of elements in the array.
> > +Its syntax is just like @code{sizeof}.
> > +The operand must be a complete array type.
> 
> I think you mean the operand must be *an expression whose type is a 
> complete array type* or *a type name for a complete array type*.  The 
> wording you have suggests only type names, you need to be clear about both 
> kinds of operands being possible (and include examples for them).
> 
> > +@smallexample
> > +__lengthof__ (int [7][n++]);  // constexpr
> > +__lengthof__ (int [n++][7]);  // run-time value
> > +@end smallexample
> 
> I don't think using "constexpr" to mean "constant expression" is a good 
> idea, they're different things.

It should actually state "integer constant expression", I think. the nuance is probably important


> > +void
> > +incomplete (int p[])
> > +{
> > +  unsigned n;
> > +
> > +  n = __lengthof__ (x);  /* { dg-error "incomplete" } */
> > +
> > +  /* We want to support the following one in the future,
> > +     but for now it should fail.  */
> > +  n = __lengthof__ (p);  /* { dg-error "invalid" } */
> 
> This seems to be the only test you have for a non-array operand.  I'd 
> expect such tests (both for type name operands and for expression 
> operands) covering cases that we *don't* want to support in future, not 
> just this one that we would like to be supportable in future.
> 
> I don't see any tests for the constraints on external definitions from 
> 6.9.1 that we discussed - that referenced to undefined internal linkage 
> identifiers are OK inside __lengthof__ returning a constant (both 
> constant-length arrays of non-VLA and constant-length arrays of VLA) but 
> not in the cases where __lengthof__ is evaluated.
>
Alejandro Colomar Aug. 7, 2024, 10:09 p.m. UTC | #7
Hi Martin,

On Wed, Aug 07, 2024 at 12:07:00PM GMT, Martin Uecker wrote:
> > > void foo(char (*a)[*], int x[*]);
> > > void foo(char (*a)[*], int x[__lengthof__(*a)]);
> > 
> > But [*] is a VLA.  Do we want to return a constexpr for it?
> 
> No,  my point is only that we could have a test for not
> returning a constant.Ā 

Ok.

> 
> If __lengthof__ would incorrectly return an integer constant
> expression then you would get a warning with -Wvla-parameter.  So
> adding these two declarations to the tests and activating
> the warning would ensure that the int[__lengthof__(*a)]
> is a VLA:  https://godbolt.org/z/7P7qW15ah
> 
> > 
> > > (With  int (*x)[*]  we would run into the issue that we can not
> > > distinguish zero arrays from unspecified ones, PR 98539)
> > 
> > As Martin Sebor said, I need to choose between supporting well [0] or
> > supporting well [*], but not both.
> 
> If you have only one array index this works. (and should
> already work correctly with your patch)

I've been thinking today that I'll add full support for [0], and let [*]
broken.

I'll also add tests for it.

> 
> > 
> > I would personally prefer supporting [0], and consider that not
> > supporting [*] is a bug in the implementation of [*] (and thus not my
> > problem).
> > 
> > However, since GCC doesn't support 0-length arrays, I'm not sure that
> > would be correct.
> > 
> > What do you think?
> 
> I think the logic in your patch is OK as is.  It does not exactly
> what you want, as it now treats some [0] as [*] but I would not
> make the logic more complex here when we will fix it properly
> anyway.

I'm detecting some issues with my patches.

	$ cat zero.c
	static int A[__lengthof__(int [0])];
	static int B[__lengthof__(A)];

	static int C[0];
	static int D[__lengthof__(C)];

	void fa(char (*a)[3][*], int (*x)[__lengthof__(*a)]);  // x: array
	void fb(char (*a)[*][3], int (*x)[__lengthof__(*a)]);  // x: vla
	void fc(char (*a)[3], int (*x)[__lengthof__(*a)]);  // x: array
	void fd(char (*a)[0], int (*x)[__lengthof__(*a)]);  // x: ?
	void fe(char (*a)[*], int (*x)[__lengthof__(*a)]);  // x: vla
	void ff(char (*a)[*], int (*x)[*]);  // x: array


	static int W[1];
	static int X[__lengthof__(W)];
	static int Y[0];
	static int Z[__lengthof__(Y)];

	$ /opt/local/gnu/gcc/lengthof/bin/gcc zero.c
	zero.c:18:12: error: variably modified ā€˜Zā€™ at file scope
	   18 | static int Z[__lengthof__(Y)];
	      |            ^


See that D, which is identical to Z, does not cause an error.
There's one case of [0] resulting in a constant expression, and another
in a VLA.  Can you please help investigate why it's happening?

I've added the following change on top of v5 to see some debugging info:

	diff --git i/gcc/c/c-typeck.cc w/gcc/c/c-typeck.cc
	index 98e8d31cb3b..9e05ee01a4a 100644
	--- i/gcc/c/c-typeck.cc
	+++ w/gcc/c/c-typeck.cc
	@@ -3462,6 +3462,8 @@ is_top_array_vla (tree type)
	   bool zero, var;
	   tree d;
	 
	+fprintf(stderr, "ALX: %s(): %d\n", __func__, __LINE__);
	+
	   if (TREE_CODE (type) != ARRAY_TYPE)
	     return false;
	   if (!COMPLETE_TYPE_P (type))
	@@ -3469,10 +3471,14 @@ is_top_array_vla (tree type)
	 
	   d = TYPE_DOMAIN (type);
	   zero = !TYPE_MAX_VALUE (d);
	+fprintf(stderr, "ALX: zero: %d\n", !!zero);
	   var = (!zero
		 && (TREE_CODE (TYPE_MIN_VALUE (d)) != INTEGER_CST
		     || TREE_CODE (TYPE_MAX_VALUE (d)) != INTEGER_CST));
	+fprintf(stderr, "ALX: var:    %d\n", !!var);
	   var = var || (zero && C_TYPE_VARIABLE_SIZE (type));
	+fprintf(stderr, "ALX: var:    %d\n", !!var);
	   return var;
	 }
	 
	@@ -3481,6 +3487,7 @@ is_top_array_vla (tree type)
	 struct c_expr
	 c_expr_lengthof_expr (location_t loc, struct c_expr expr)
	 {
	+fprintf(stderr, "ALX: %s(): %d\n", __func__, __LINE__);
	   struct c_expr ret;
	   if (expr.value == error_mark_node)
	     {
	@@ -3522,6 +3529,7 @@ c_expr_lengthof_expr (location_t loc, struct c_expr expr)
	 struct c_expr
	 c_expr_lengthof_type (location_t loc, struct c_type_name *t)
	 {
	+fprintf(stderr, "ALX: %s(): %d\n", __func__, __LINE__);
	   tree type;
	   struct c_expr ret;
	   tree type_expr = NULL_TREE;

which prints:

	$ /opt/local/gnu/gcc/lengthof/bin/gcc zero.c
	ALX: c_expr_lengthof_type(): 3531
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    0
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    0
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    0
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    0
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    0
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    0
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 0
	ALX: var:    0
	ALX: var:    0
	ALX: is_top_array_vla(): 3465
	ALX: zero: 0
	ALX: var:    0
	ALX: var:    0
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 0
	ALX: var:    0
	ALX: var:    0
	ALX: is_top_array_vla(): 3465
	ALX: zero: 0
	ALX: var:    0
	ALX: var:    0
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 0
	ALX: var:    0
	ALX: var:    0
	ALX: is_top_array_vla(): 3465
	ALX: zero: 0
	ALX: var:    0
	ALX: var:    0
	ALX: c_expr_lengthof_expr(): 3489
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	ALX: is_top_array_vla(): 3465
	ALX: zero: 1
	ALX: var:    0
	ALX: var:    1
	zero.c:18:12: error: variably modified ā€˜Zā€™ at file scope
	   18 | static int Z[__lengthof__(Y)];
	      |            ^

If I make [0] always result in a constant expression (and thus break
some [*] cases), by doing

	-  var = var || (zero && C_TYPE_VARIABLE_SIZE (type));

Then the problem disappears.  But I'm worried that it might be hiding
the problem instead of removing it, since I don't really understand why
it's happening.  Do you know why?

Anyway, I'll remove that line to support [0].  But it would be
interesting to learn why this problem triggers.


Have a lovely night!
Alex
Alejandro Colomar Aug. 7, 2024, 10:44 p.m. UTC | #8
Hi Jens, Joseph,

On Wed, Aug 07, 2024 at 05:30:13PM GMT, Jens Gustedt wrote:
> Hi
> 
> Am 7. August 2024 17:05:48 MESZ schrieb Joseph Myers <josmyers@redhat.com>:
> > On Wed, 7 Aug 2024, Alejandro Colomar wrote:
> > 
> > > +@node Length
> > > +@section Determining the Length of Arrays
> > > +@cindex lengthof
> > > +@cindex length
> > > +@cindex array length
> > > +
> > > +The keyword @code{__lengthof__} determines the length of an array operand,
> > > +that is, the number of elements in the array.
> > > +Its syntax is just like @code{sizeof}.
> > > +The operand must be a complete array type.
> > 
> > I think you mean the operand must be *an expression whose type is a 
> > complete array type* or *a type name for a complete array type*.  The 
> > wording you have suggests only type names, you need to be clear about both 
> > kinds of operands being possible (and include examples for them).

I've written the following for v6:

-Its syntax is just like @code{sizeof}.
-The operand must be a complete array type.
+Its syntax is similar to @code{sizeof}.
+The operand must be a complete array type or an expression of that type.
+For example:
+
+@smallexample
+int a[n];
+__lengthof__ (a);  // returns n
+__lengthof__ (int [7][3]);  // returns 7
+@end smallexample
+


> > 
> > > +@smallexample
> > > +__lengthof__ (int [7][n++]);  // constexpr
> > > +__lengthof__ (int [n++][7]);  // run-time value
> > > +@end smallexample
> > 
> > I don't think using "constexpr" to mean "constant expression" is a good 
> > idea, they're different things.
> 
> It should actually state "integer constant expression", I think. the nuance is probably important

Agree.

> 
> 
> > > +void
> > > +incomplete (int p[])
> > > +{
> > > +  unsigned n;
> > > +
> > > +  n = __lengthof__ (x);  /* { dg-error "incomplete" } */
> > > +
> > > +  /* We want to support the following one in the future,
> > > +     but for now it should fail.  */
> > > +  n = __lengthof__ (p);  /* { dg-error "invalid" } */
> > 
> > This seems to be the only test you have for a non-array operand.  I'd 
> > expect such tests (both for type name operands and for expression 
> > operands) covering cases that we *don't* want to support in future, not 
> > just this one that we would like to be supportable in future.
> > 
> > I don't see any tests for the constraints on external definitions from 
> > 6.9.1 that we discussed - that referenced to undefined internal linkage 
> > identifiers are OK inside __lengthof__ returning a constant (both 
> > constant-length arrays of non-VLA and constant-length arrays of VLA) but 
> > not in the cases where __lengthof__ is evaluated.
> > 

I think I've added them for v6.  (Please let me know if anything is
still untested there.)  I'll publish v6 after I test for regressions.

Have a lovely night!
Alex
Jens Gustedt Aug. 8, 2024, 5:35 a.m. UTC | #9
Hello Alejandro,

On Thu, 8 Aug 2024 00:44:02 +0200, Alejandro Colomar wrote:

> +Its syntax is similar to @code{sizeof}.

For my curiosity, do you also make the same distinction that with
expressions you may omit the parenthesis?

I wouldn't be sure that we should continue that distinction from
`sizeof`. Also that prefix variant would be difficult to wrap in a
`lengthof` macro (without underscores) as we would probably like to
have it in the end.

Thanks
Jā‚‘ā‚™ā‚›
Martin Uecker Aug. 8, 2024, 7:39 a.m. UTC | #10
Am Donnerstag, dem 08.08.2024 um 00:09 +0200 schrieb Alejandro Colomar:
> Hi Martin,
> > 
...

> > > 
> > > I would personally prefer supporting [0], and consider that not
> > > supporting [*] is a bug in the implementation of [*] (and thus not my
> > > problem).
> > > 
> > > However, since GCC doesn't support 0-length arrays, I'm not sure that
> > > would be correct.
> > > 
> > > What do you think?
> > 
> > I think the logic in your patch is OK as is.  It does not exactly
> > what you want, as it now treats some [0] as [*] but I would not
> > make the logic more complex here when we will fix it properly
> > anyway.
> 
> I'm detecting some issues with my patches.
> 
> 	$ cat zero.c
> 	static int A[__lengthof__(int [0])];
> 	static int B[__lengthof__(A)];
> 
> 	static int C[0];
> 	static int D[__lengthof__(C)];
> 
> 	void fa(char (*a)[3][*], int (*x)[__lengthof__(*a)]);  // x: array
> 	void fb(char (*a)[*][3], int (*x)[__lengthof__(*a)]);  // x: vla
> 	void fc(char (*a)[3], int (*x)[__lengthof__(*a)]);  // x: array
> 	void fd(char (*a)[0], int (*x)[__lengthof__(*a)]);  // x: ?
> 	void fe(char (*a)[*], int (*x)[__lengthof__(*a)]);  // x: vla
> 	void ff(char (*a)[*], int (*x)[*]);  // x: array
> 
> 
> 	static int W[1];
> 	static int X[__lengthof__(W)];
> 	static int Y[0];
> 	static int Z[__lengthof__(Y)];
> 
> 	$ /opt/local/gnu/gcc/lengthof/bin/gcc zero.c
> 	zero.c:18:12: error: variably modified ā€˜Zā€™ at file scope
> 	   18 | static int Z[__lengthof__(Y)];
> 	      |            ^
> 
> 
> See that D, which is identical to Z, does not cause an error.
> There's one case of [0] resulting in a constant expression, and another
> in a VLA.  Can you please help investigate why it's happening?

This seems to be another bug where we incorrectly set
C_TYPE_VARIABLE_SIZE and this also affects sizeof:

https://godbolt.org/z/a8Ej6c5jr

Strangely it seems related to the function declaration
with the unspecified size before.  I will look into this,
I am just working on some checking functions that make sure
that those bits are consistent all the time because I also
missed some cases where I need to set C_TYPE_VARIABLY_MODIFIED

I filed a new bug:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116284

...

> 	      |            ^
> 
> If I make [0] always result in a constant expression (and thus break
> some [*] cases), by doing
> 
> 	-  var = var || (zero && C_TYPE_VARIABLE_SIZE (type));
> 
> Then the problem disappears.  But I'm worried that it might be hiding
> the problem instead of removing it, since I don't really understand why
> it's happening.  Do you know why?
> 
> Anyway, I'll remove that line to support [0].  But it would be
> interesting to learn why this problem triggers.

You need the line to support variable size arrays. Please just  uncomment
your test with a reference to the bug for now and I will try fix this ASAP.

Martin



> Alex
>
Alejandro Colomar Aug. 8, 2024, 8:26 a.m. UTC | #11
Hello Jens,

On Thu, Aug 08, 2024 at 07:35:12AM GMT, Jā‚‘ā‚™ā‚› Gustedt wrote:
> Hello Alejandro,
> 
> On Thu, 8 Aug 2024 00:44:02 +0200, Alejandro Colomar wrote:
> 
> > +Its syntax is similar to @code{sizeof}.
> 
> For my curiosity, do you also make the same distinction that with
> expressions you may omit the parenthesis?

I thought of it.  TBH, I haven't tested that thoroughly.

In principle, I have implemented it in the same way as sizeof, yes.

Personally, I would have never allowed sizeof without parentheses, but I
understand there are people who think the parentheses hurt readability,
so I kept it in the same way.

I'm not sure why the parentheses are necessary with type names in
sizeof, but to maintain expectations, I think it would be better to do
the same here.

> 
> I wouldn't be sure that we should continue that distinction from
> `sizeof`.

But then, what do we do?  Allow lengthof with type names without parens?
Or require parens?  I'm not comfortable with that choice.

> Also that prefix variant would be difficult to wrap in a
> `lengthof` macro (without underscores) as we would probably like to
> have it in the end.

Do you mean that I should add _Lengthof?  We're adding __lengthof__ to
be a GNU extension with relative freedom from ISO.  If I sent a patch
adding _Lengthof, we'd have to send a proposal to ISO at the same time,
and we'd be waiting for ISO to discuss it before I can merge it.  And we
couldn't bring prior art to ISO.

With this approach instead, the plan is:

-  Merge __lengthof__ in GCC before ISO hears of it (well, there are
   already several WG14 members in this discussion, so you have actually
   heard of it, but we're free to do more or less what we want).

-  Propose _Lengthof to ISO C, with prior art in GCC as __lengthof__,
   proposing the same semantics.  Also propose a lengthof macro defined
   in <stdlength.h>

-  When ISO C accepts _Lengthof and lengthof, map _Lengthof in GCC to
   the same internals as __lengthof__, so they are the same thing.

Still, I'm interested in having some feedback from WG14, to prevent
implementing something that will have modifications when merged to
ISO C, so please CC anyone interested from WG14, if you know of any.

Have a lovely day!
Alex
Alejandro Colomar Aug. 8, 2024, 8:42 a.m. UTC | #12
Hi Martin,

On Thu, Aug 08, 2024 at 09:39:59AM GMT, Martin Uecker wrote:
> > 	$ /opt/local/gnu/gcc/lengthof/bin/gcc zero.c
> > 	zero.c:18:12: error: variably modified ā€˜Zā€™ at file scope
> > 	   18 | static int Z[__lengthof__(Y)];
> > 	      |            ^
> > 
> > 
> > See that D, which is identical to Z, does not cause an error.
> > There's one case of [0] resulting in a constant expression, and another
> > in a VLA.  Can you please help investigate why it's happening?
> 
> This seems to be another bug where we incorrectly set
> C_TYPE_VARIABLE_SIZE and this also affects sizeof:
> 
> https://godbolt.org/z/a8Ej6c5jr
> 
> Strangely it seems related to the function declaration
> with the unspecified size before.  I will look into this,
> I am just working on some checking functions that make sure
> that those bits are consistent all the time because I also
> missed some cases where I need to set C_TYPE_VARIABLY_MODIFIED
> 
> I filed a new bug:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116284

Huh, that's obscure!  Thanks!  :-)

> 
> ...
> 
> > 	      |            ^
> > 
> > If I make [0] always result in a constant expression (and thus break
> > some [*] cases), by doing
> > 
> > 	-  var = var || (zero && C_TYPE_VARIABLE_SIZE (type));
> > 
> > Then the problem disappears.  But I'm worried that it might be hiding
> > the problem instead of removing it, since I don't really understand why
> > it's happening.  Do you know why?
> > 
> > Anyway, I'll remove that line to support [0].  But it would be
> > interesting to learn why this problem triggers.
> 
> You need the line to support variable size arrays.

Not really.  'zero' is only true for [0] and for [*], but nor for
[zero], right?  

All vla tests seem to pass if I remove that line.  The only issue will
be that

	void f(char (*a)[*], int (*x)[__lengthof__(*a)]);

will result in 'int (*x)[0]' until you change the implementation of [*],
but I think we can live with that small detail.

> Please just  uncomment
> your test with a reference to the bug for now and I will try fix this ASAP.

I'll send v6 in a moment; feel free to insist in this if you disagree
after seeing it, but I think it works well without the line.

> 
> Martin

Cheers,
Alex
Jens Gustedt Aug. 8, 2024, 9:13 a.m. UTC | #13
Hi

Am 8. August 2024 10:26:14 MESZ schrieb Alejandro Colomar <alx@kernel.org>:
> Hello Jens,
> 
> On Thu, Aug 08, 2024 at 07:35:12AM GMT, Jā‚‘ā‚™ā‚› Gustedt wrote:
> > Hello Alejandro,
> > 
> > On Thu, 8 Aug 2024 00:44:02 +0200, Alejandro Colomar wrote:
> > 
> > > +Its syntax is similar to @code{sizeof}.
> > 
> > For my curiosity, do you also make the same distinction that with
> > expressions you may omit the parenthesis?
> 
> I thought of it.  TBH, I haven't tested that thoroughly.
> 
> In principle, I have implemented it in the same way as sizeof, yes.
> 
> Personally, I would have never allowed sizeof without parentheses, but I
> understand there are people who think the parentheses hurt readability,
> so I kept it in the same way.
> 
> I'm not sure why the parentheses are necessary with type names in
> sizeof,

probably because of operator precedence. there would be no rule that tells us where sizeof ends and we'd switch back from parsing a type to parsing an expression


> but to maintain expectations, I think it would be better to do
> the same here.

Just to compare, the recent additions in C23 typeof etc. only have the parenthesized versions. So there would be precedent. And it really eases transition


> > 
> > I wouldn't be sure that we should continue that distinction from
> > `sizeof`.
> 
> But then, what do we do?  Allow lengthof with type names without parens?
> Or require parens?  I'm not comfortable with that choice.
> 
> > Also that prefix variant would be difficult to wrap in a
> > `lengthof` macro (without underscores) as we would probably like to
> > have it in the end.
> 
> Do you mean that I should add _Lengthof?  We're adding __lengthof__ to
> be a GNU extension with relative freedom from ISO.  If I sent a patch
> adding _Lengthof, we'd have to send a proposal to ISO at the same time,
> and we'd be waiting for ISO to discuss it before I can merge it.  And we
> couldn't bring prior art to ISO.
> 
> With this approach instead, the plan is:
> 
> -  Merge __lengthof__ in GCC before ISO hears of it (well, there are
>    already several WG14 members in this discussion, so you have actually
>    heard of it, but we're free to do more or less what we want).
> 
> -  Propose _Lengthof to ISO C, with prior art in GCC as __lengthof__,
>    proposing the same semantics.  Also propose a lengthof macro defined
>    in <stdlength.h>

I don't really see why we should take a detour via _Lengthof, I would hope we could directly propose lengthof as the standardization

> -  When ISO C accepts _Lengthof and lengthof, map _Lengthof in GCC to
>    the same internals as __lengthof__, so they are the same thing.
> 
> Still, I'm interested in having some feedback from WG14, to prevent
> implementing something that will have modifications when merged to
> ISO C, so please CC anyone interested from WG14, if you know of any.

I think that more important would be to have clang on board with this.

In any case, thanks for doing this!

Jens
Martin Uecker Aug. 8, 2024, 9:23 a.m. UTC | #14
Am Donnerstag, dem 08.08.2024 um 10:42 +0200 schrieb Alejandro Colomar:
> > 
> > ...
> > 
> > > 	      |            ^
> > > 
> > > If I make [0] always result in a constant expression (and thus break
> > > some [*] cases), by doing
> > > 
> > > 	-  var = var || (zero && C_TYPE_VARIABLE_SIZE (type));
> > > 
> > > Then the problem disappears.  But I'm worried that it might be hiding
> > > the problem instead of removing it, since I don't really understand why
> > > it's happening.  Do you know why?
> > > 
> > > Anyway, I'll remove that line to support [0].  But it would be
> > > interesting to learn why this problem triggers.
> > 
> > You need the line to support variable size arrays.
> 
> Not really.  'zero' is only true for [0] and for [*], but nor for
> [zero], right?  
> 
> All vla tests seem to pass if I remove that line.  The only issue will
> be that
> 
> 	void f(char (*a)[*], int (*x)[__lengthof__(*a)]);
> 
> will result in 'int (*x)[0]' until you change the implementation of [*],
> but I think we can live with that small detail.


I plan to change the representation of [0], so it would be nice if the
[*] cases are correct as much as possible so that they not get forgotten
later.

Martin

> 
> > Please just  uncomment
> > your test with a reference to the bug for now and I will try fix this ASAP.
> 
> I'll send v6 in a moment; feel free to insist in this if you disagree
> after seeing it, but I think it works well without the line.
> 
> > 
> > Martin
> 
> Cheers,
> Alex
>
Alejandro Colomar Aug. 8, 2024, 9:25 a.m. UTC | #15
Hi Jens,

On Thu, Aug 08, 2024 at 11:13:02AM GMT, Jens Gustedt wrote:
> > but to maintain expectations, I think it would be better to do
> > the same here.
> 
> Just to compare, the recent additions in C23 typeof etc. only have the
> parenthesized versions. So there would be precedent. And it really
> eases transition

Hmmm, interesting.

The good part of reusing sizeof syntax is that I can reuse internal code
for sizeof.  But I'll check if I can change it easily to only support
parens.

> > > I wouldn't be sure that we should continue that distinction from
> > > `sizeof`.
> > 
> > But then, what do we do?  Allow lengthof with type names without parens?
> > Or require parens?  I'm not comfortable with that choice.
> > 
> > > Also that prefix variant would be difficult to wrap in a
> > > `lengthof` macro (without underscores) as we would probably like to
> > > have it in the end.
> > 
> > Do you mean that I should add _Lengthof?  We're adding __lengthof__ to
> > be a GNU extension with relative freedom from ISO.  If I sent a patch
> > adding _Lengthof, we'd have to send a proposal to ISO at the same time,
> > and we'd be waiting for ISO to discuss it before I can merge it.  And we
> > couldn't bring prior art to ISO.
> > 
> > With this approach instead, the plan is:
> > 
> > -  Merge __lengthof__ in GCC before ISO hears of it (well, there are
> >    already several WG14 members in this discussion, so you have actually
> >    heard of it, but we're free to do more or less what we want).
> > 
> > -  Propose _Lengthof to ISO C, with prior art in GCC as __lengthof__,
> >    proposing the same semantics.  Also propose a lengthof macro defined
> >    in <stdlength.h>
> 
> I don't really see why we should take a detour via _Lengthof, I would
> hope we could directly propose lengthof as the standardization

Hmmm, maybe programs already use lengthof for some other purpose.
Hopefully not, but I don't know.  In any case, I'm fine with both
approaches.

> > -  When ISO C accepts _Lengthof and lengthof, map _Lengthof in GCC to
> >    the same internals as __lengthof__, so they are the same thing.
> > 
> > Still, I'm interested in having some feedback from WG14, to prevent
> > implementing something that will have modifications when merged to
> > ISO C, so please CC anyone interested from WG14, if you know of any.
> 
> I think that more important would be to have clang on board with this.

Does anyone have any Clang maintainer in mind that would be interested
in being CCed?  If so, please let me know (and/or add it yourselves).

> 
> In any case, thanks for doing this!

:-)

Cheers,
Alex
Alejandro Colomar Aug. 8, 2024, 9:36 a.m. UTC | #16
On Thu, Aug 08, 2024 at 11:23:51AM GMT, Martin Uecker wrote:
> > Not really.  'zero' is only true for [0] and for [*], but nor for
> > [zero], right?  
> > 
> > All vla tests seem to pass if I remove that line.  The only issue will
> > be that
> > 
> > 	void f(char (*a)[*], int (*x)[__lengthof__(*a)]);
> > 
> > will result in 'int (*x)[0]' until you change the implementation of [*],
> > but I think we can live with that small detail.
> 
> 
> I plan to change the representation of [0], so it would be nice if the
> [*] cases are correct as much as possible so that they not get forgotten
> later.

Ahhh, thanks!  Will do, then.

> 
> Martin

Cheers,
Alex
Joseph Myers Aug. 8, 2024, 11:28 a.m. UTC | #17
On Thu, 8 Aug 2024, Alejandro Colomar wrote:

> Hi Jens,
> 
> On Thu, Aug 08, 2024 at 11:13:02AM GMT, Jens Gustedt wrote:
> > > but to maintain expectations, I think it would be better to do
> > > the same here.
> > 
> > Just to compare, the recent additions in C23 typeof etc. only have the
> > parenthesized versions. So there would be precedent. And it really
> > eases transition
> 
> Hmmm, interesting.
> 
> The good part of reusing sizeof syntax is that I can reuse internal code
> for sizeof.  But I'll check if I can change it easily to only support
> parens.

Since typeof produces a type, it's used in different syntactic contexts 
from sizeof, so has different ambiguity issues, and requiring parentheses 
with typeof is not relevant to sizeof/lengthof.  I think lengthof should 
follow sizeof.  Make sure there's a testcase for lengthof applied to a 
compound literal (the case that illustrates how, on parsing sizeof 
(type-name), the compiler needs to see what comes after (type-name) to 
determine whether it's actually sizeof applied to an expression (if '{' 
follows) or to a type (otherwise)).  (If you're following the sizeof 
implementation closely enough, this should just work.)
Jens Gustedt Aug. 8, 2024, 2:56 p.m. UTC | #18
Am 8. August 2024 13:28:57 MESZ schrieb Joseph Myers <josmyers@redhat.com>:
> On Thu, 8 Aug 2024, Alejandro Colomar wrote:
> 
> > Hi Jens,
> > 
> > On Thu, Aug 08, 2024 at 11:13:02AM GMT, Jens Gustedt wrote:
> > > > but to maintain expectations, I think it would be better to do
> > > > the same here.
> > > 
> > > Just to compare, the recent additions in C23 typeof etc. only have the
> > > parenthesized versions. So there would be precedent. And it really
> > > eases transition
> > 
> > Hmmm, interesting.
> > 
> > The good part of reusing sizeof syntax is that I can reuse internal code
> > for sizeof.  But I'll check if I can change it easily to only support
> > parens.
> 
> Since typeof produces a type, it's used in different syntactic contexts 
> from sizeof, so has different ambiguity issues, and requiring parentheses 
> with typeof is not relevant to sizeof/lengthof.  I think lengthof should 
> follow sizeof.  Make sure there's a testcase for lengthof applied to a 
> compound literal (the case that illustrates how, on parsing sizeof 
> (type-name), the compiler needs to see what comes after (type-name) to 
> determine whether it's actually sizeof applied to an expression (if '{' 
> follows) or to a type (otherwise)).  (If you're following the sizeof 
> implementation closely enough, this should just work.)
> 
> -- 
> Joseph S. Myers
> josmyers@redhat.com
> 

Hi, 
I am not convinced that we should introduce the same syntax weirdness
for this feature. sizeof seems to be the only place in the core language
where a keyword is used as an operator in expressions, and
that does not resemble function-call notation. In particular your 
example with compound literals shows that we could avoid syntax look-ahead 
by not doing this. (People argued violently against look-ahead when we discussed possible inclusion of lambdas into C23)

We don't have to repeat all historic accidents when inventing a new feature.
Sure that gcc may invent anything to their liking, but when and if we pass this
for standardisaĀ­tion we will give such considerations a careful look.

Jens
Martin Uecker Aug. 8, 2024, 3:42 p.m. UTC | #19
Am Donnerstag, dem 08.08.2024 um 16:56 +0200 schrieb Jens Gustedt:
> Am 8. August 2024 13:28:57 MESZ schrieb Joseph Myers <josmyers@redhat.com>:
> > On Thu, 8 Aug 2024, Alejandro Colomar wrote:
> > 
> > > Hi Jens,
> > > 
> > > On Thu, Aug 08, 2024 at 11:13:02AM GMT, Jens Gustedt wrote:
> > > > > but to maintain expectations, I think it would be better to do
> > > > > the same here.
> > > > > 
> > > > 
> > > > Just to compare, the recent additions in C23 typeof etc. only have the
> > > > parenthesized versions. So there would be precedent. And it really
> > > > eases transition
> > > > 
> > > Hmmm, interesting.
> > > 
> > > The good part of reusing sizeof syntax is that I can reuse internal code
> > > for sizeof. But I'll check if I can change it easily to only support
> > > parens.
> > > 
> > 
> > Since typeof produces a type, it's used in different syntactic contexts 
> > from sizeof, so has different ambiguity issues, and requiring parentheses 
> > with typeof is not relevant to sizeof/lengthof. I think lengthof should 
> > follow sizeof. Make sure there's a testcase for lengthof applied to a 
> > compound literal (the case that illustrates how, on parsing sizeof 
> > (type-name), the compiler needs to see what comes after (type-name) to 
> > determine whether it's actually sizeof applied to an expression (if '{' 
> > follows) or to a type (otherwise)). (If you're following the sizeof 
> > implementation closely enough, this should just work.)

> Hi, 
> I am not convinced that we should introduce the same syntax weirdness
> for this feature. sizeof seems to be the only place in the core language
> where a keyword is used as an operator in expressions, and
> that does not resemble function-call notation. In particular your 
> example with compound literals shows that we could avoid syntax look-ahead 
> by not doing this.Ā 

It is the other way around: With the "(" there is the ambiguity
whether this starts a compound literal or a type name enclosed
in parentheses.  But this is not problematic for parsing.

Martin


> (People argued violently against look-ahead when we discussed
> possible inclusion of lambdas into C23)

> We don't have to repeat all historic accidents when inventing a new feature.
> Sure that gcc may invent anything to their liking, but when and if we pass this
> for standardisaĀ­tion we will give such considerations a careful look.

> Jens
Jens Gustedt Aug. 8, 2024, 3:56 p.m. UTC | #20
Am 8. August 2024 17:42:54 MESZ schrieb Martin Uecker <uecker@tugraz.at>:
> Am Donnerstag, dem 08.08.2024 um 16:56 +0200 schrieb Jens Gustedt:
> > Am 8. August 2024 13:28:57 MESZ schrieb Joseph Myers <josmyers@redhat.com>:
> > > On Thu, 8 Aug 2024, Alejandro Colomar wrote:
> > > 
> > > > Hi Jens,
> > > > 
> > > > On Thu, Aug 08, 2024 at 11:13:02AM GMT, Jens Gustedt wrote:
> > > > > > but to maintain expectations, I think it would be better to do
> > > > > > the same here.
> > > > > > 
> > > > > 
> > > > > Just to compare, the recent additions in C23 typeof etc. only have the
> > > > > parenthesized versions. So there would be precedent. And it really
> > > > > eases transition
> > > > > 
> > > > Hmmm, interesting.
> > > > 
> > > > The good part of reusing sizeof syntax is that I can reuse internal code
> > > > for sizeof. But I'll check if I can change it easily to only support
> > > > parens.
> > > > 
> > > 
> > > Since typeof produces a type, it's used in different syntactic contexts 
> > > from sizeof, so has different ambiguity issues, and requiring parentheses 
> > > with typeof is not relevant to sizeof/lengthof. I think lengthof should 
> > > follow sizeof. Make sure there's a testcase for lengthof applied to a 
> > > compound literal (the case that illustrates how, on parsing sizeof 
> > > (type-name), the compiler needs to see what comes after (type-name) to 
> > > determine whether it's actually sizeof applied to an expression (if '{' 
> > > follows) or to a type (otherwise)). (If you're following the sizeof 
> > > implementation closely enough, this should just work.)
> 
> > Hi, 
> > I am not convinced that we should introduce the same syntax weirdness
> > for this feature. sizeof seems to be the only place in the core language
> > where a keyword is used as an operator in expressions, and
> > that does not resemble function-call notation. In particular your 
> > example with compound literals shows that we could avoid syntax look-ahead 
> > by not doing this.Ā 
> 
> It is the other way around: With the "(" there is the ambiguity
> whether this starts a compound literal or a type name enclosed
> in parentheses.  But this is not problematic for parsing.

No, the ambiguity is there because the first ( after the keyword could start either a type in parenthesis or an expression, and among these a compound literal. If that first parenthesis would be part of the construct (as for the typeof or offsetof constructs) there would be no ambiguity a the only look ahead would be balanced parenthesis parsing.

And just because there is "no problem"
because we learned to deal with this weirdness, it still doesn't mean we have to write an inconsistency forward for which we don't even remember why we have it.


> 
> Martin
> 
> 
> > (People argued violently against look-ahead when we discussed
> > possible inclusion of lambdas into C23)
> 
> > We don't have to repeat all historic accidents when inventing a new feature.
> > Sure that gcc may invent anything to their liking, but when and if we pass this
> > for standardisaĀ­tion we will give such considerations a careful look.
> 
> > Jens
>
Joseph Myers Aug. 8, 2024, 4:08 p.m. UTC | #21
On Thu, 8 Aug 2024, Jens Gustedt wrote:

> No, the ambiguity is there because the first ( after the keyword could 
> start either a type in parenthesis or an expression, and among these a 
> compound literal. If that first parenthesis would be part of the 
> construct (as for the typeof or offsetof constructs) there would be no 
> ambiguity a the only look ahead would be balanced parenthesis parsing.

I don't consider this ambiguity / unbounded lookahead in any problematic 
sense.  There are the following cases for sizeof:

* Not followed by '(': sizeof unary-expression.

* Followed by '(' then a token that does not start a type-name: sizeof 
unary-expression.

* Followed by '(' then a token that does start a type-name: sizeof 
(type-name) later-tokens, where if later-tokens start with '{' then it's 
sizeof unary-expression and otherwise it's sizeof (type-name).

The last case is not problematic because the parsing of the type-name 
doesn't depend at all on what comes after it; it's parsed exactly the same 
whether it's part of sizeof (type-name) or a compound literal.  
Fundamentally this is exactly the same as if a cast-expression starts with 
(type-name): until the end of the type name, you don't know whether it's a 
cast, or whether the cast-expression is actually a unary-expression which 
is a postfix-expression which is a compound-literal.  In both cases, the 
parsing of a compound-literal is entered only after the initial 
(type-name) has been seen, because until after the (type-name) it's not 
known which construct is being parsed.
Jens Gustedt Aug. 8, 2024, 4:23 p.m. UTC | #22
As said, even if we don't consider this problematic because we are used to the mildly complex case distinction that you just exposed over several paragraphs, it doesn't mean that we should do it, nor does it mean that it would be beneficial for our users or for other implementations that would like to follow. 

And also as said, all other features in the standard, being types, typeof, or expressions, e.g offsetof, unreachable or other gnu extensions,  don't have nor need this kind of syntax.

We should be designing features for the future, not the past

Jens
Martin Uecker Aug. 8, 2024, 4:30 p.m. UTC | #23
Am Donnerstag, dem 08.08.2024 um 18:23 +0200 schrieb Jens Gustedt:
> As said, even if we don't consider this problematic because we are used to the mildly complex case distinction that you just exposed over several paragraphs, it doesn't mean that we should
> do it, nor does it mean that it would be beneficial for our users or for other implementations that would like to follow. 
> 
> And also as said, all other features in the standard, being types, typeof, or expressions, e.g offsetof, unreachable or other gnu extensions,Ā  don't have nor need this kind of syntax.
> 
> We should be designing features for the future, not the past


While not problematic for parsing, I see now how the grammar becomes
better if we eliminated this quirk. Thanks!

But we should then deprecate this for sizeof too.


Martin


> 
> Jens
Alejandro Colomar Aug. 8, 2024, 5:01 p.m. UTC | #24
Hi Martin, Jens, Joseph,

On Thu, Aug 08, 2024 at 06:30:42PM GMT, Martin Uecker wrote:
> Am Donnerstag, dem 08.08.2024 um 18:23 +0200 schrieb Jens Gustedt:
> > As said, even if we don't consider this problematic because we are used to the mildly complex case distinction that you just exposed over several paragraphs, it doesn't mean that we should
> > do it, nor does it mean that it would be beneficial for our users or for other implementations that would like to follow. 
> > 
> > And also as said, all other features in the standard, being types, typeof, or expressions, e.g offsetof, unreachable or other gnu extensions,Ā  don't have nor need this kind of syntax.
> > 
> > We should be designing features for the future, not the past
> 
> 
> While not problematic for parsing, I see now how the grammar becomes
> better if we eliminated this quirk. Thanks!
> 
> But we should then deprecate this for sizeof too.

How about having __lengthof__ behave like sizeof, but deprecate it in
sizeof too?

ISO C could accept only lengthof() with parens, and we could have it
without them as a deprecated-on-arrival GNU extension.

And then remove it from both at some point in the future.

We could start by adding a -Wall warning for sizeof without parens, and
promote it to an error a few versions later.

Have a lovely day!
Alex

P.S.:  I'm doing a whole-tree update to use __lengthof__ instead of
open-coded sizeof divisons or macros based on it, and I've found several
bugs already.  I'll use this change to test the new operator in the
entire code base, which should result in no regressions at all.  That
would be an interesting test suite.  :)

However, I advance that it will be painful to review that patch.
David Brown Aug. 8, 2024, 5:21 p.m. UTC | #25
On 08/08/2024 11:13, Jens Gustedt wrote:
> Hi
> 
> Am 8. August 2024 10:26:14 MESZ schrieb Alejandro Colomar <alx@kernel.org>:
>> Hello Jens,
>>
>> On Thu, Aug 08, 2024 at 07:35:12AM GMT, Jā‚‘ā‚™ā‚› Gustedt wrote:
>>> Hello Alejandro,
>>>
>>> On Thu, 8 Aug 2024 00:44:02 +0200, Alejandro Colomar wrote:
>>>
>>>> +Its syntax is similar to @code{sizeof}.
>>>
>>> For my curiosity, do you also make the same distinction that with
>>> expressions you may omit the parenthesis?
>>
>> I thought of it.  TBH, I haven't tested that thoroughly.
>>
>> In principle, I have implemented it in the same way as sizeof, yes.
>>
>> Personally, I would have never allowed sizeof without parentheses, but I
>> understand there are people who think the parentheses hurt readability,
>> so I kept it in the same way.
>>
>> I'm not sure why the parentheses are necessary with type names in
>> sizeof,
> 
> probably because of operator precedence. there would be no rule that tells us where sizeof ends and we'd switch back from parsing a type to parsing an expression
> 

I personally have always found it looks odd that the sizeof operator 
does not always need parentheses - I suppose that is because it is a 
word, rather than punctuation.  To me, it looks more like a function or 
function-like macro.  And I'd view lengthof in the same light.  However, 
that's just personal opinion, not a rational argument!

> 
>> but to maintain expectations, I think it would be better to do
>> the same here.
> 
> Just to compare, the recent additions in C23 typeof etc. only have the parenthesized versions. So there would be precedent. And it really eases transition
> 

_Alignof (now "alignof") from C11 always needs parentheses too - but it 
always applies to a type, not an expression.  (I think it should also be 
possible to use it with expressions for consistency, but that's another 
matter.)

As I see it, there is a good reason to say that a "lengthof" feature 
should always have parentheses.  With "typeof" (either as the gcc 
extension or the C23 feature), you can come a long way to the 
functionality of the proposed "lengthof" (or "__lengthof__") using a 
macro.  This will mean that if someone writes code using the new feature 
in gcc, and another person wants to compile the code with older gcc or a 
different compiler, they can use a macro (even "#define lengthof(arr) 
(sizeof(arr)/sizeof((arr)[0])", which is less safe but works everywhere)
instead.  But that is only true of the person writing the original 
"lengthof" code has included the parentheses.

> 
>>>
>>> I wouldn't be sure that we should continue that distinction from
>>> `sizeof`.
>>
>> But then, what do we do?  Allow lengthof with type names without parens?
>> Or require parens?  I'm not comfortable with that choice.
>>
>>> Also that prefix variant would be difficult to wrap in a
>>> `lengthof` macro (without underscores) as we would probably like to
>>> have it in the end.
>>
>> Do you mean that I should add _Lengthof?  We're adding __lengthof__ to
>> be a GNU extension with relative freedom from ISO.  If I sent a patch
>> adding _Lengthof, we'd have to send a proposal to ISO at the same time,
>> and we'd be waiting for ISO to discuss it before I can merge it.  And we
>> couldn't bring prior art to ISO.
>>
>> With this approach instead, the plan is:
>>
>> -  Merge __lengthof__ in GCC before ISO hears of it (well, there are
>>     already several WG14 members in this discussion, so you have actually
>>     heard of it, but we're free to do more or less what we want).
>>
>> -  Propose _Lengthof to ISO C, with prior art in GCC as __lengthof__,
>>     proposing the same semantics.  Also propose a lengthof macro defined
>>     in <stdlength.h>
> 
> I don't really see why we should take a detour via _Lengthof, I would hope we could directly propose lengthof as the standardization
> 

It is traditional for C.  It has taken until C23 to get alignof, bool, 
etc., as full keywords.  I would expect that we would have _Lengthof for 
a transitional period while "lengthof" is in "<stdlength.h>" and other 
uses of it are deprecated.  Changes in C happen slowly if backwards 
compatibility is threatened (too slowly for some people, too fast for 
others).

>> -  When ISO C accepts _Lengthof and lengthof, map _Lengthof in GCC to
>>     the same internals as __lengthof__, so they are the same thing.
>>
>> Still, I'm interested in having some feedback from WG14, to prevent
>> implementing something that will have modifications when merged to
>> ISO C, so please CC anyone interested from WG14, if you know of any.
> 
> I think that more important would be to have clang on board with this.
> 
> In any case, thanks for doing this!
> 
> Jens
> 
>
Joseph Myers Aug. 8, 2024, 5:31 p.m. UTC | #26
On Thu, 8 Aug 2024, Alejandro Colomar wrote:

> How about having __lengthof__ behave like sizeof, but deprecate it in
> sizeof too?

Deprecation would be a matter for WG14.

> We could start by adding a -Wall warning for sizeof without parens, and
> promote it to an error a few versions later.

This is very far outside the scope of -Wall.  There is nothing confusing 
for the programmer about sizeof without parentheses and no likelihood that 
the programmer meant something other than the semantics of the code.

GCC should not be opinionated about promoting personal ideas of what is or 
is not good style or what might or might not be a future language feature; 
it should support a wide range of different programming styles.  The 
threshold for warning about something in -Wall (or -Wextra) should be much 
higher than "the language design would be simpler without this feature".

> P.S.:  I'm doing a whole-tree update to use __lengthof__ instead of
> open-coded sizeof divisons or macros based on it, and I've found several
> bugs already.  I'll use this change to test the new operator in the
> entire code base, which should result in no regressions at all.  That
> would be an interesting test suite.  :)

I think the code base (code on the host is generally in C++) should be 
readable to people who know C++ (C++11 is the documented requirement for 
building GCC - we're very conservative about adopting new language 
versions, to facilitate bootstrapping on a wide range of systems) as it 
is, not a playground for trying out new language features.  We have enough 
GCC-specific versions of standard features as it is (e.g. the GCC-specific 
vectors designed to interoperate with GCC's garbage collection), using a 
new feature that doesn't add expressivity and isn't in any standard C++ 
version doesn't seem like a good idea to me.

Actual bugs should of course be fixed.  But certainly standard features 
are preferable to something specific to GCC, and existing macros in GCC 
such as ARRAY_SIZE that people are at least familiar with are preferable 
to introducing a new language feature.

*If* the feature were adopted into C++26, we could then consider if 
existing macros should be renamed to look more like the future language 
feature.

Target code is at least always compiled with the same version of GCC, but 
it still shouldn't be a playground for new language features; that doesn't 
help readability, backporting patches to versions without the features, 
etc.
Alejandro Colomar Aug. 8, 2024, 6:04 p.m. UTC | #27
Hi Joseph,

On Thu, Aug 08, 2024 at 05:31:05PM GMT, Joseph Myers wrote:
> On Thu, 8 Aug 2024, Alejandro Colomar wrote:
> 
> > How about having __lengthof__ behave like sizeof, but deprecate it in
> > sizeof too?
> 
> Deprecation would be a matter for WG14.

Yep; I wouldn't add it to -Wall unless WG14 decides to deprecate it
first.  But if it does, that could be the path.  For lengthof, I think
keeping it like sizeof would be the simplest, as an implementer.  And
users will probably not care too much.  And if WG14 decides to deprecate
it from sizeof, they can also deprecate it from lengthof at the same
time.

> I think the code base (code on the host is generally in C++) should be 
> readable to people who know C++ (C++11 is the documented requirement for 
> building GCC - we're very conservative about adopting new language 
> versions, to facilitate bootstrapping on a wide range of systems) as it 
> is, not a playground for trying out new language features.  We have enough 
> GCC-specific versions of standard features as it is (e.g. the GCC-specific 
> vectors designed to interoperate with GCC's garbage collection), using a 
> new feature that doesn't add expressivity and isn't in any standard C++ 
> version doesn't seem like a good idea to me.
> 
> Actual bugs should of course be fixed.  But certainly standard features 
> are preferable to something specific to GCC, and existing macros in GCC 
> such as ARRAY_SIZE that people are at least familiar with are preferable 
> to introducing a new language feature.

ARRAY_SIZE() is very rarely used.  From what I've seen, most of the
existing code uses the raw sizeof division, and there's a non-negligible
amount of typos in those.

I suggest that someone at least converts most or all calls to
ARRAY_SIZE(), so that it can later easily be changed to lengthof().

I can provide my patch as a draft, so that it's just adding some include
and s/__lengthof__/ARRAY_SIZE/, plus some whitespace and parens fixes.

> 
> *If* the feature were adopted into C++26, we could then consider if 
> existing macros should be renamed to look more like the future language 
> feature.
> 
> Target code is at least always compiled with the same version of GCC, but 
> it still shouldn't be a playground for new language features; that doesn't 
> help readability, backporting patches to versions without the features, 
> etc.

It will serve me as a huge test suite anyway; so it's worth it even if
just for myself.  And it will uncover bugs.  :)

Thanks!

Have a lovely day!
Alex
Martin Uecker Aug. 8, 2024, 6:16 p.m. UTC | #28
Am Donnerstag, dem 08.08.2024 um 20:04 +0200 schrieb Alejandro Colomar:

> 
...
> > 
> > *If* the feature were adopted into C++26, we could then consider if 
> > existing macros should be renamed to look more like the future language 
> > feature.
> > 
> > Target code is at least always compiled with the same version of GCC, but 
> > it still shouldn't be a playground for new language features; that doesn't 
> > help readability, backporting patches to versions without the features, 
> > etc.
> 
> It will serve me as a huge test suite anyway; so it's worth it even if
> just for myself.  And it will uncover bugs.  :)

Did you implement a C++ version? Or are you talking about the C parts
of the code.  It is a bit sad that we do not get the testing of the
C FE anymore which a self-hosting would have.

Martin
Jens Gustedt Aug. 8, 2024, 6:19 p.m. UTC | #29
Am 8. August 2024 19:21:23 MESZ schrieb David Brown <david.brown@hesbynett.no>:
> 
> 
> On 08/08/2024 11:13, Jens Gustedt wrote:
> > Hi
> > 
> > Am 8. August 2024 10:26:14 MESZ schrieb Alejandro Colomar <alx@kernel.org>:
> >> Hello Jens,
> >> 
> >> On Thu, Aug 08, 2024 at 07:35:12AM GMT, Jā‚‘ā‚™ā‚› Gustedt wrote:
> >>> Hello Alejandro,
> >>> 
> >>> On Thu, 8 Aug 2024 00:44:02 +0200, Alejandro Colomar wrote:
> >>> 
> >>>> +Its syntax is similar to @code{sizeof}.
> >>> 
> >>> For my curiosity, do you also make the same distinction that with
> >>> expressions you may omit the parenthesis?
> >> 
> >> I thought of it.  TBH, I haven't tested that thoroughly.
> >> 
> >> In principle, I have implemented it in the same way as sizeof, yes.
> >> 
> >> Personally, I would have never allowed sizeof without parentheses, but I
> >> understand there are people who think the parentheses hurt readability,
> >> so I kept it in the same way.
> >> 
> >> I'm not sure why the parentheses are necessary with type names in
> >> sizeof,
> > 
> > probably because of operator precedence. there would be no rule that tells us where sizeof ends and we'd switch back from parsing a type to parsing an expression
> > 
> 
> I personally have always found it looks odd that the sizeof operator does not always need parentheses - I suppose that is because it is a word, rather than punctuation.  To me, it looks more like a function or function-like macro.  And I'd view lengthof in the same light.  However, that's just personal opinion, not a rational argument!
> 
> > 
> >> but to maintain expectations, I think it would be better to do
> >> the same here.
> > 
> > Just to compare, the recent additions in C23 typeof etc. only have the parenthesized versions. So there would be precedent. And it really eases transition
> > 
> 
> _Alignof (now "alignof") from C11 always needs parentheses too - but it always applies to a type, not an expression.  (I think it should also be possible to use it with expressions for consistency, but that's another matter.)
> 
> As I see it, there is a good reason to say that a "lengthof" feature should always have parentheses.  With "typeof" (either as the gcc extension or the C23 feature), you can come a long way to the functionality of the proposed "lengthof" (or "__lengthof__") using a macro.  This will mean that if someone writes code using the new feature in gcc, and another person wants to compile the code with older gcc or a different compiler, they can use a macro (even "#define lengthof(arr) (sizeof(arr)/sizeof((arr)[0])", which is less safe but works everywhere)
> instead.  But that is only true of the person writing the original "lengthof" code has included the parentheses.
> 
> > 
> >>> 
> >>> I wouldn't be sure that we should continue that distinction from
> >>> `sizeof`.
> >> 
> >> But then, what do we do?  Allow lengthof with type names without parens?
> >> Or require parens?  I'm not comfortable with that choice.
> >> 
> >>> Also that prefix variant would be difficult to wrap in a
> >>> `lengthof` macro (without underscores) as we would probably like to
> >>> have it in the end.
> >> 
> >> Do you mean that I should add _Lengthof?  We're adding __lengthof__ to
> >> be a GNU extension with relative freedom from ISO.  If I sent a patch
> >> adding _Lengthof, we'd have to send a proposal to ISO at the same time,
> >> and we'd be waiting for ISO to discuss it before I can merge it.  And we
> >> couldn't bring prior art to ISO.
> >> 
> >> With this approach instead, the plan is:
> >> 
> >> -  Merge __lengthof__ in GCC before ISO hears of it (well, there are
> >>     already several WG14 members in this discussion, so you have actually
> >>     heard of it, but we're free to do more or less what we want).
> >> 
> >> -  Propose _Lengthof to ISO C, with prior art in GCC as __lengthof__,
> >>     proposing the same semantics.  Also propose a lengthof macro defined
> >>     in <stdlength.h>
> > 
> > I don't really see why we should take a detour via _Lengthof, I would hope we could directly propose lengthof as the standardization
> > 
> 
> It is traditional for C.  It has taken until C23 to get alignof, bool, etc., as full keywords.  I would expect that we would have _Lengthof for a transitional period while "lengthof" is in "<stdlength.h>" and other uses of it are deprecated.  Changes in C happen slowly if backwards compatibility is threatened (too slowly for some people, too fast for others).

The reason is not to use user space identifiers. There __lengthof__ is already as good as the other weirdo, no reason to add that.

> 
> >> -  When ISO C accepts _Lengthof and lengthof, map _Lengthof in GCC to
> >>     the same internals as __lengthof__, so they are the same thing.
> >> 
> >> Still, I'm interested in having some feedback from WG14, to prevent
> >> implementing something that will have modifications when merged to
> >> ISO C, so please CC anyone interested from WG14, if you know of any.
> > 
> > I think that more important would be to have clang on board with this.
> > 
> > In any case, thanks for doing this!
> > 
> > Jens
> > 
> >
Alejandro Colomar Aug. 8, 2024, 6:30 p.m. UTC | #30
Hi Martin,

On Thu, Aug 08, 2024 at 08:16:50PM GMT, Martin Uecker wrote:
> > It will serve me as a huge test suite anyway; so it's worth it even if
> > just for myself.  And it will uncover bugs.  :)
> 
> Did you implement a C++ version? Or are you talking about the C parts
> of the code.

I'll start with C, but was considering trying to implement a C++
version.  But I think it's not worth it for me.  I don't like the
language at all, anyway.  :)

> It is a bit sad that we do not get the testing of the
> C FE anymore which a self-hosting would have.

Yup.  I wish GCC had not moved to C++.  Maybe improving the C language
helps that goal (move back to C) in the very long term.  :)

> Martin

Cheers,
Alex
Alejandro Colomar Aug. 8, 2024, 8:01 p.m. UTC | #31
Hi Joseph,

On Thu, Aug 08, 2024 at 05:31:05PM GMT, Joseph Myers wrote:
> Actual bugs should of course be fixed.

Here are the suspects:

./gcc/testsuite/gcc.target/powerpc/sse3-addsubps.c:80:
	  for (i = 0; i < sizeof (vals) / sizeof (vals); i += 8)

./gcc/c-family/c-pragma.cc:1811:
		    = sizeof (omp_pragmas_simd) / sizeof (*omp_pragmas);

The second sizeof expression seems to have a bogus operand, and the
correct one is probably the common one in sizeof divisions.

There are some others which are weird, but the result is correct.  For
example:

./libstdc++-v3/testsuite/21_strings/basic_string/cons/char/constexpr.cc:62:
	  const auto len = (sizeof(cs) - 1)/sizeof(C);
./libstdc++-v3/testsuite/21_strings/basic_string/cons/wchar_t/constexpr.cc:62:
	  const auto len = (sizeof(cs) - 1)/sizeof(C);

Which would read better as sizeof/sizeof - 1.

There are others which also could be improved in readability terms, but
I don't think it's worth bikeshedding that at the moment.  If you do a
global switch to ARRAY_SIZE(), I can help with those.  I'll keep the
commit in the backyard in case we need it for something.

It's:
 662 files changed, 1426 insertions(+), 1545 deletions(-)

Have a lovely night!
Alex
Joseph Myers Aug. 8, 2024, 8:36 p.m. UTC | #32
On Thu, 8 Aug 2024, Alejandro Colomar wrote:

> Here are the suspects:
> 
> ./gcc/testsuite/gcc.target/powerpc/sse3-addsubps.c:80:
> 	  for (i = 0; i < sizeof (vals) / sizeof (vals); i += 8)

The key question for testcases is *does the test actually test what was 
intended*?  We never want to change testcases simply for cleanness or 
consistency; being inconsistent is actively good in the testsuite, which 
should cover as wide a variety of weird code as possible.
Alejandro Colomar Aug. 8, 2024, 8:56 p.m. UTC | #33
On Thu, Aug 08, 2024 at 08:36:36PM GMT, Joseph Myers wrote:
> On Thu, 8 Aug 2024, Alejandro Colomar wrote:
> 
> > Here are the suspects:
> > 
> > ./gcc/testsuite/gcc.target/powerpc/sse3-addsubps.c:80:
> > 	  for (i = 0; i < sizeof (vals) / sizeof (vals); i += 8)
> 
> The key question for testcases is *does the test actually test what was 
> intended*?  We never want to change testcases simply for cleanness or 
> consistency; being inconsistent is actively good in the testsuite, which 
> should cover as wide a variety of weird code as possible.

I suspect it doesn't test what was intended.  sizeof(vals)/sizeof(vals)
is 1, which doesn't look like intended at all.  Also, there were related
tests that had the proper division.

> 
> -- 
> Joseph S. Myers
> josmyers@redhat.com
>
Jakub Jelinek Aug. 8, 2024, 10:43 p.m. UTC | #34
On Thu, Aug 08, 2024 at 10:01:14PM +0200, Alejandro Colomar wrote:
> Hi Joseph,
> 
> On Thu, Aug 08, 2024 at 05:31:05PM GMT, Joseph Myers wrote:
> > Actual bugs should of course be fixed.
> 
> Here are the suspects:
> 
> ./gcc/testsuite/gcc.target/powerpc/sse3-addsubps.c:80:
> 	  for (i = 0; i < sizeof (vals) / sizeof (vals); i += 8)
> 
> ./gcc/c-family/c-pragma.cc:1811:
> 		    = sizeof (omp_pragmas_simd) / sizeof (*omp_pragmas);
> 
> The second sizeof expression seems to have a bogus operand, and the
> correct one is probably the common one in sizeof divisions.

While the c-pragma one looks suspect and should be using ARRAY_SIZE (I'll
post a patch), it actually works correctly.
Both omp_pragmas and omp_pragmas_simd are arrays of the same structure, so
whether it divides by sizeof (*omp_pragmas) or sizeof (*omp_pragmas_simd)
or sizeof (omp_pragmas_simd[0]) doesn't matter.

	Jakub
Jakub Jelinek Aug. 8, 2024, 10:48 p.m. UTC | #35
On Thu, Aug 08, 2024 at 10:01:14PM +0200, Alejandro Colomar wrote:
> ./libstdc++-v3/testsuite/21_strings/basic_string/cons/char/constexpr.cc:62:
> 	  const auto len = (sizeof(cs) - 1)/sizeof(C);
> ./libstdc++-v3/testsuite/21_strings/basic_string/cons/wchar_t/constexpr.cc:62:
> 	  const auto len = (sizeof(cs) - 1)/sizeof(C);
> 
> Which would read better as sizeof/sizeof - 1.
> 
> There are others which also could be improved in readability terms, but
> I don't think it's worth bikeshedding that at the moment.  If you do a
> global switch to ARRAY_SIZE(), I can help with those.  I'll keep the
> commit in the backyard in case we need it for something.

E.g. in testcases, we definitely don't want to use ARRAY_SIZE, at least most
of them should be self-contained if possible, include as few headers as
posible etc.

	Jakub
diff mbox series

Patch

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index e7e371fd26f..9f5feb83345 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -465,6 +465,7 @@  const struct c_common_resword c_common_reswords[] =
   { "__inline",		RID_INLINE,	0 },
   { "__inline__",	RID_INLINE,	0 },
   { "__label__",	RID_LABEL,	0 },
+  { "__lengthof__",	RID_LENGTHOF, 0 },
   { "__null",		RID_NULL,	0 },
   { "__real",		RID_REALPART,	0 },
   { "__real__",		RID_REALPART,	0 },
@@ -4070,6 +4071,31 @@  c_alignof_expr (location_t loc, tree expr)
 
   return fold_convert_loc (loc, size_type_node, t);
 }
+
+/* Implement the lengthof keyword: Return the length of an array,
+   that is, the number of elements in the array.  */
+
+tree
+c_lengthof_type (location_t loc, tree type)
+{
+  enum tree_code type_code;
+
+  type_code = TREE_CODE (type);
+  if (type_code != ARRAY_TYPE)
+    {
+      error_at (loc, "invalid application of %<lengthof%> to type %qT", type);
+      return error_mark_node;
+    }
+  if (!COMPLETE_TYPE_P (type))
+    {
+      error_at (loc,
+		"invalid application of %<lengthof%> to incomplete type %qT",
+		type);
+      return error_mark_node;
+    }
+
+  return array_type_nelts_top (type);
+}
 
 /* Handle C and C++ default attributes.  */
 
diff --git a/gcc/c-family/c-common.def b/gcc/c-family/c-common.def
index 5de96e5d4a8..6d162f67104 100644
--- a/gcc/c-family/c-common.def
+++ b/gcc/c-family/c-common.def
@@ -50,6 +50,9 @@  DEFTREECODE (EXCESS_PRECISION_EXPR, "excess_precision_expr", tcc_expression, 1)
    number.  */
 DEFTREECODE (USERDEF_LITERAL, "userdef_literal", tcc_exceptional, 3)
 
+/* Represents a 'lengthof' expression.  */
+DEFTREECODE (LENGTHOF_EXPR, "lengthof_expr", tcc_expression, 1)
+
 /* Represents a 'sizeof' expression during C++ template expansion,
    or for the purpose of -Wsizeof-pointer-memaccess warning.  */
 DEFTREECODE (SIZEOF_EXPR, "sizeof_expr", tcc_expression, 1)
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index ccaea27c2b9..f815a4cf3bc 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -105,6 +105,7 @@  enum rid
 
   /* C extensions */
   RID_ASM,       RID_TYPEOF,   RID_TYPEOF_UNQUAL, RID_ALIGNOF,  RID_ATTRIBUTE,
+  RID_LENGTHOF,
   RID_VA_ARG,
   RID_EXTENSION, RID_IMAGPART, RID_REALPART, RID_LABEL,    RID_CHOOSE_EXPR,
   RID_TYPES_COMPATIBLE_P,      RID_BUILTIN_COMPLEX,	   RID_BUILTIN_SHUFFLE,
@@ -885,6 +886,7 @@  extern tree c_common_truthvalue_conversion (location_t, tree);
 extern void c_apply_type_quals_to_decl (int, tree);
 extern tree c_sizeof_or_alignof_type (location_t, tree, bool, bool, int);
 extern tree c_alignof_expr (location_t, tree);
+extern tree c_lengthof_type (location_t, tree);
 /* Print an error message for invalid operands to arith operation CODE.
    NOP_EXPR is used as a special case (see truthvalue_conversion).  */
 extern void binary_op_error (rich_location *, enum tree_code, tree, tree);
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 4dced430d1f..790c58b2558 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -8937,12 +8937,16 @@  start_struct (location_t loc, enum tree_code code, tree name,
      within a statement expr used within sizeof, et. al.  This is not
      terribly serious as C++ doesn't permit statement exprs within
      sizeof anyhow.  */
-  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof))
+  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof || in_lengthof))
     warning_at (loc, OPT_Wc___compat,
 		"defining type in %qs expression is invalid in C++",
 		(in_sizeof
 		 ? "sizeof"
-		 : (in_typeof ? "typeof" : "alignof")));
+		 : (in_typeof
+		    ? "typeof"
+		    : (in_alignof
+		       ? "alignof"
+		       : "lengthof"))));
 
   if (in_underspecified_init)
     error_at (loc, "%qT defined in underspecified object initializer", ref);
@@ -9897,7 +9901,7 @@  finish_struct (location_t loc, tree t, tree fieldlist, tree attributes,
 	 struct_types.  */
       if (warn_cxx_compat
 	  && struct_parse_info != NULL
-	  && !in_sizeof && !in_typeof && !in_alignof)
+	  && !in_sizeof && !in_typeof && !in_alignof && !in_lengthof)
 	struct_parse_info->struct_types.safe_push (t);
      }
 
@@ -10071,12 +10075,16 @@  start_enum (location_t loc, struct c_enum_contents *the_enum, tree name,
   /* FIXME: This will issue a warning for a use of a type defined
      within sizeof in a statement expr.  This is not terribly serious
      as C++ doesn't permit statement exprs within sizeof anyhow.  */
-  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof))
+  if (warn_cxx_compat && (in_sizeof || in_typeof || in_alignof || in_lengthof))
     warning_at (loc, OPT_Wc___compat,
 		"defining type in %qs expression is invalid in C++",
 		(in_sizeof
 		 ? "sizeof"
-		 : (in_typeof ? "typeof" : "alignof")));
+		 : (in_typeof
+		    ? "typeof"
+		    : (in_alignof
+		       ? "alignof"
+		       : "lengthof"))));
 
   if (in_underspecified_init)
     error_at (loc, "%qT defined in underspecified object initializer",
@@ -10270,7 +10278,7 @@  finish_enum (tree enumtype, tree values, tree attributes)
      struct_types.  */
   if (warn_cxx_compat
       && struct_parse_info != NULL
-      && !in_sizeof && !in_typeof && !in_alignof)
+      && !in_sizeof && !in_typeof && !in_alignof && !in_lengthof)
     struct_parse_info->struct_types.safe_push (enumtype);
 
   /* Check for consistency with previous definition */
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 12c5ed5d92c..09bb19f9299 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -74,7 +74,17 @@  along with GCC; see the file COPYING3.  If not see
 #include "bitmap.h"
 #include "analyzer/analyzer-language.h"
 #include "toplev.h"
+
+#define c_parser_sizeof_expression(parser)                                    \
+(                                                                             \
+  c_parser_sizeof_or_lengthof_expression (parser, RID_SIZEOF)                 \
+)
 
+#define c_parser_lengthof_expression(parser)                                  \
+(                                                                             \
+  c_parser_sizeof_or_lengthof_expression (parser, RID_LENGTHOF)               \
+)
+
 /* We need to walk over decls with incomplete struct/union/enum types
    after parsing the whole translation unit.
    In finish_decl(), if the decl is static, has incomplete
@@ -1687,7 +1697,7 @@  static struct c_expr c_parser_binary_expression (c_parser *, struct c_expr *,
 						 tree);
 static struct c_expr c_parser_cast_expression (c_parser *, struct c_expr *);
 static struct c_expr c_parser_unary_expression (c_parser *);
-static struct c_expr c_parser_sizeof_expression (c_parser *);
+static struct c_expr c_parser_sizeof_or_lengthof_expression (c_parser *, enum rid);
 static struct c_expr c_parser_alignof_expression (c_parser *);
 static struct c_expr c_parser_postfix_expression (c_parser *);
 static struct c_expr c_parser_postfix_expression_after_paren_type (c_parser *,
@@ -9864,6 +9874,8 @@  c_parser_unary_expression (c_parser *parser)
     case CPP_KEYWORD:
       switch (c_parser_peek_token (parser)->keyword)
 	{
+	case RID_LENGTHOF:
+	  return c_parser_lengthof_expression (parser);
 	case RID_SIZEOF:
 	  return c_parser_sizeof_expression (parser);
 	case RID_ALIGNOF:
@@ -9903,12 +9915,13 @@  c_parser_unary_expression (c_parser *parser)
 /* Parse a sizeof expression.  */
 
 static struct c_expr
-c_parser_sizeof_expression (c_parser *parser)
+c_parser_sizeof_or_lengthof_expression (c_parser *parser, enum rid rid)
 {
+  const char *op_name = (rid == RID_LENGTHOF) ? "lengthof" : "sizeof";
   struct c_expr expr;
   struct c_expr result;
   location_t expr_loc;
-  gcc_assert (c_parser_next_token_is_keyword (parser, RID_SIZEOF));
+  gcc_assert (c_parser_next_token_is_keyword (parser, rid));
 
   location_t start;
   location_t finish = UNKNOWN_LOCATION;
@@ -9917,7 +9930,10 @@  c_parser_sizeof_expression (c_parser *parser)
 
   c_parser_consume_token (parser);
   c_inhibit_evaluation_warnings++;
-  in_sizeof++;
+  if (rid == RID_LENGTHOF)
+    in_lengthof++;
+  else
+    in_sizeof++;
   if (c_parser_next_token_is (parser, CPP_OPEN_PAREN)
       && c_token_starts_compound_literal (c_parser_peek_2nd_token (parser)))
     {
@@ -9936,7 +9952,10 @@  c_parser_sizeof_expression (c_parser *parser)
 	{
 	  struct c_expr ret;
 	  c_inhibit_evaluation_warnings--;
-	  in_sizeof--;
+	  if (rid == RID_LENGTHOF)
+	    in_lengthof--;
+	  else
+	    in_sizeof--;
 	  ret.set_error ();
 	  ret.original_code = ERROR_MARK;
 	  ret.original_type = NULL;
@@ -9948,31 +9967,45 @@  c_parser_sizeof_expression (c_parser *parser)
 							       type_name,
 							       expr_loc);
 	  finish = expr.get_finish ();
-	  goto sizeof_expr;
+	  goto Xof_expr;
 	}
       /* sizeof ( type-name ).  */
       if (scspecs)
-	error_at (expr_loc, "storage class specifier in %<sizeof%>");
+	error_at (expr_loc, "storage class specifier in %qs", op_name);
       if (type_name->specs->alignas_p)
 	error_at (type_name->specs->locations[cdw_alignas],
-		  "alignment specified for type name in %<sizeof%>");
+		  "alignment specified for type name in %qs", op_name);
       c_inhibit_evaluation_warnings--;
-      in_sizeof--;
-      result = c_expr_sizeof_type (expr_loc, type_name);
+      if (rid == RID_LENGTHOF)
+	{
+	  in_lengthof--;
+	  result = c_expr_lengthof_type (expr_loc, type_name);
+	}
+      else
+	{
+	  in_sizeof--;
+	  result = c_expr_sizeof_type (expr_loc, type_name);
+	}
     }
   else
     {
       expr_loc = c_parser_peek_token (parser)->location;
       expr = c_parser_unary_expression (parser);
       finish = expr.get_finish ();
-    sizeof_expr:
+    Xof_expr:
       c_inhibit_evaluation_warnings--;
-      in_sizeof--;
+      if (rid == RID_LENGTHOF)
+	in_lengthof--;
+      else
+	in_sizeof--;
       mark_exp_read (expr.value);
       if (TREE_CODE (expr.value) == COMPONENT_REF
 	  && DECL_C_BIT_FIELD (TREE_OPERAND (expr.value, 1)))
-	error_at (expr_loc, "%<sizeof%> applied to a bit-field");
-      result = c_expr_sizeof_expr (expr_loc, expr);
+	error_at (expr_loc, "%qs applied to a bit-field", op_name);
+      if (rid == RID_LENGTHOF)
+	result = c_expr_lengthof_expr (expr_loc, expr);
+      else
+	result = c_expr_sizeof_expr (expr_loc, expr);
     }
   if (finish == UNKNOWN_LOCATION)
     finish = start;
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 15da875a029..102fcfefea6 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -736,6 +736,7 @@  extern int c_type_dwarf_attribute (const_tree, int);
 /* in c-typeck.cc */
 extern int in_alignof;
 extern int in_sizeof;
+extern int in_lengthof;
 extern int in_typeof;
 extern bool c_in_omp_for;
 extern bool c_omp_array_section_p;
@@ -786,6 +787,9 @@  extern tree build_external_ref (location_t, tree, bool, tree *);
 extern void pop_maybe_used (bool);
 extern struct c_expr c_expr_sizeof_expr (location_t, struct c_expr);
 extern struct c_expr c_expr_sizeof_type (location_t, struct c_type_name *);
+extern struct c_expr c_expr_lengthof_expr (location_t, struct c_expr);
+extern struct c_expr c_expr_lengthof_type (location_t loc,
+                                           struct c_type_name *);
 extern struct c_expr parser_build_unary_op (location_t, enum tree_code,
     					    struct c_expr);
 extern struct c_expr parser_build_binary_op (location_t,
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 7e0f01ed22b..98e8d31cb3b 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -71,6 +71,9 @@  int in_alignof;
 /* The level of nesting inside "sizeof".  */
 int in_sizeof;
 
+/* The level of nesting inside "lengthof".  */
+int in_lengthof;
+
 /* The level of nesting inside "typeof".  */
 int in_typeof;
 
@@ -3255,7 +3258,7 @@  build_external_ref (location_t loc, tree id, bool fun, tree *type)
 
   if (TREE_CODE (ref) == FUNCTION_DECL && !in_alignof)
     {
-      if (!in_sizeof && !in_typeof)
+      if (!in_sizeof && !in_typeof && !in_lengthof)
 	C_DECL_USED (ref) = 1;
       else if (DECL_INITIAL (ref) == NULL_TREE
 	       && DECL_EXTERNAL (ref)
@@ -3311,7 +3314,7 @@  struct maybe_used_decl
 {
   /* The decl.  */
   tree decl;
-  /* The level seen at (in_sizeof + in_typeof).  */
+  /* The level seen at (in_sizeof + in_typeof + in_lengthof).  */
   int level;
   /* The next one at this level or above, or NULL.  */
   struct maybe_used_decl *next;
@@ -3329,7 +3332,7 @@  record_maybe_used_decl (tree decl)
 {
   struct maybe_used_decl *t = XOBNEW (&parser_obstack, struct maybe_used_decl);
   t->decl = decl;
-  t->level = in_sizeof + in_typeof;
+  t->level = in_sizeof + in_typeof + in_lengthof;
   t->next = maybe_used_decls;
   maybe_used_decls = t;
 }
@@ -3343,7 +3346,7 @@  void
 pop_maybe_used (bool used)
 {
   struct maybe_used_decl *p = maybe_used_decls;
-  int cur_level = in_sizeof + in_typeof;
+  int cur_level = in_sizeof + in_typeof + in_lengthof;
   while (p && p->level > cur_level)
     {
       if (used)
@@ -3453,6 +3456,109 @@  c_expr_sizeof_type (location_t loc, struct c_type_name *t)
   return ret;
 }
 
+static bool
+is_top_array_vla (tree type)
+{
+  bool zero, var;
+  tree d;
+
+  if (TREE_CODE (type) != ARRAY_TYPE)
+    return false;
+  if (!COMPLETE_TYPE_P (type))
+    return false;
+
+  d = TYPE_DOMAIN (type);
+  zero = !TYPE_MAX_VALUE (d);
+  var = (!zero
+	 && (TREE_CODE (TYPE_MIN_VALUE (d)) != INTEGER_CST
+	     || TREE_CODE (TYPE_MAX_VALUE (d)) != INTEGER_CST));
+  var = var || (zero && C_TYPE_VARIABLE_SIZE (type));
+  return var;
+}
+
+/* Return the result of lengthof applied to EXPR.  */
+
+struct c_expr
+c_expr_lengthof_expr (location_t loc, struct c_expr expr)
+{
+  struct c_expr ret;
+  if (expr.value == error_mark_node)
+    {
+      ret.value = error_mark_node;
+      ret.original_code = ERROR_MARK;
+      ret.original_type = NULL;
+      ret.m_decimal = 0;
+      pop_maybe_used (false);
+    }
+  else
+    {
+      bool expr_const_operands = true;
+
+      tree folded_expr = c_fully_fold (expr.value, require_constant_value,
+				       &expr_const_operands);
+      ret.value = c_lengthof_type (loc, TREE_TYPE (folded_expr));
+      c_last_sizeof_arg = expr.value;
+      c_last_sizeof_loc = loc;
+      ret.original_code = LENGTHOF_EXPR;
+      ret.original_type = NULL;
+      ret.m_decimal = 0;
+      if (is_top_array_vla (TREE_TYPE (folded_expr)))
+	{
+	  /* lengthof is evaluated when given a vla.  */
+	  ret.value = build2 (C_MAYBE_CONST_EXPR, TREE_TYPE (ret.value),
+			      folded_expr, ret.value);
+	  C_MAYBE_CONST_EXPR_NON_CONST (ret.value) = !expr_const_operands;
+	  SET_EXPR_LOCATION (ret.value, loc);
+	}
+      pop_maybe_used (is_top_array_vla (TREE_TYPE (folded_expr)));
+    }
+  return ret;
+}
+
+/* Return the result of lengthof applied to T, a structure for the type
+   name passed to _lengthof (rather than the type itself).  LOC is the
+   location of the original expression.  */
+
+struct c_expr
+c_expr_lengthof_type (location_t loc, struct c_type_name *t)
+{
+  tree type;
+  struct c_expr ret;
+  tree type_expr = NULL_TREE;
+  bool type_expr_const = true;
+  type = groktypename (t, &type_expr, &type_expr_const);
+  ret.value = c_lengthof_type (loc, type);
+  c_last_sizeof_arg = type;
+  c_last_sizeof_loc = loc;
+  ret.original_code = LENGTHOF_EXPR;
+  ret.original_type = NULL;
+  ret.m_decimal = 0;
+  if (type == error_mark_node)
+    {
+      ret.value = error_mark_node;
+      ret.original_code = ERROR_MARK;
+    }
+  else
+  if ((type_expr || TREE_CODE (ret.value) == INTEGER_CST)
+      && is_top_array_vla (type))
+    {
+      /* If the type is a [*] array, it is a VLA but is represented as
+	 having a size of zero.  In such a case we must ensure that
+	 the result of lengthof does not get folded to a constant by
+	 c_fully_fold, because if the length is evaluated the result is
+	 not constant and so constraints on zero or negative size
+	 arrays must not be applied when this lengthof call is inside
+	 another array declarator.  */
+      if (!type_expr)
+	type_expr = integer_zero_node;
+      ret.value = build2 (C_MAYBE_CONST_EXPR, TREE_TYPE (ret.value),
+			  type_expr, ret.value);
+      C_MAYBE_CONST_EXPR_NON_CONST (ret.value) = !type_expr_const;
+    }
+  pop_maybe_used (type != error_mark_node ? is_top_array_vla (type) : false);
+  return ret;
+}
+
 /* Build a function call to function FUNCTION with parameters PARAMS.
    The function call is at LOC.
    PARAMS is a list--a chain of TREE_LIST nodes--in which the
diff --git a/gcc/cp/operators.def b/gcc/cp/operators.def
index d8878923602..d640ed8bd91 100644
--- a/gcc/cp/operators.def
+++ b/gcc/cp/operators.def
@@ -91,6 +91,7 @@  DEF_OPERATOR ("co_await", CO_AWAIT_EXPR, "aw", OVL_OP_FLAG_UNARY)
 
 /* These are extensions.  */
 DEF_OPERATOR ("alignof", ALIGNOF_EXPR, "az", OVL_OP_FLAG_UNARY)
+DEF_OPERATOR ("__lengthof__", LENGTHOF_EXPR, "lz", OVL_OP_FLAG_UNARY)
 DEF_OPERATOR ("__imag__", IMAGPART_EXPR, "v18__imag__", OVL_OP_FLAG_UNARY)
 DEF_OPERATOR ("__real__", REALPART_EXPR, "v18__real__", OVL_OP_FLAG_UNARY)
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 0b572afca72..21608eb43a6 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10391,6 +10391,29 @@  If the operand of the @code{__alignof__} expression is a function,
 the expression evaluates to the alignment of the function which may
 be specified by attribute @code{aligned} (@pxref{Common Function Attributes}).
 
+@node Length
+@section Determining the Length of Arrays
+@cindex lengthof
+@cindex length
+@cindex array length
+
+The keyword @code{__lengthof__} determines the length of an array operand,
+that is, the number of elements in the array.
+Its syntax is just like @code{sizeof}.
+The operand must be a complete array type.
+The operand is not evaluated
+if the top-level length designator is an integer constant expression
+(in this case, the operator results in a constant expression);
+and it is evaluated
+if the top-level length designator is not an integer constant expression
+(in this case, the operator results in a run-time value).
+For example:
+
+@smallexample
+__lengthof__ (int [7][n++]);  // constexpr
+__lengthof__ (int [n++][7]);  // run-time value
+@end smallexample
+
 @node Inline
 @section An Inline Function is As Fast As a Macro
 @cindex inline functions
diff --git a/gcc/target.h b/gcc/target.h
index c1f99b97b86..79890ae9944 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -245,6 +245,9 @@  enum type_context_kind {
   /* Directly measuring the alignment of T.  */
   TCTX_ALIGNOF,
 
+  /* Directly measuring the length of array T.  */
+  TCTX_LENGTHOF,
+
   /* Creating objects of type T with static storage duration.  */
   TCTX_STATIC_STORAGE,
 
diff --git a/gcc/testsuite/gcc.dg/lengthof-compile.c b/gcc/testsuite/gcc.dg/lengthof-compile.c
new file mode 100644
index 00000000000..6b44704ca7e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lengthof-compile.c
@@ -0,0 +1,49 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
+
+extern int x[];
+
+void
+incomplete (int p[])
+{
+  unsigned n;
+
+  n = __lengthof__ (x);  /* { dg-error "incomplete" } */
+
+  /* We want to support the following one in the future,
+     but for now it should fail.  */
+  n = __lengthof__ (p);  /* { dg-error "invalid" } */
+}
+
+void
+fam (void)
+{
+  struct {
+    int x;
+    int fam[];
+  } s;
+  unsigned n;
+
+  n = __lengthof__ (s.fam); /* { dg-error "incomplete" } */
+}
+
+void fix_fix (int i, char (*a)[3][5], int (*x)[__lengthof__ (*a)]);
+void fix_var (int i, char (*a)[3][i], int (*x)[__lengthof__ (*a)]);
+void fix_uns (int i, char (*a)[3][*], int (*x)[__lengthof__ (*a)]);
+
+void
+func (void)
+{
+  int  i3[3];
+  int  i5[5];
+  char c35[3][5];
+
+  fix_fix (5, &c35, &i3);
+  fix_fix (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
+
+  fix_var (5, &c35, &i3);
+  fix_var (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
+
+  fix_uns (5, &c35, &i3);
+  fix_uns (5, &c35, &i5); /* { dg-error "incompatible-pointer-types" } */
+}
diff --git a/gcc/testsuite/gcc.dg/lengthof.c b/gcc/testsuite/gcc.dg/lengthof.c
new file mode 100644
index 00000000000..38da5df52a5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lengthof.c
@@ -0,0 +1,127 @@ 
+/* { dg-do run } */
+/* { dg-options "-Wno-declaration-after-statement -Wno-pedantic -Wno-vla" } */
+
+#undef NDEBUG
+#include <assert.h>
+
+void
+array (void)
+{
+  short a[7];
+
+  assert (__lengthof__ (a) == 7);
+  assert (__lengthof__ (long [0]) == 0);
+  assert (__lengthof__ (unsigned [99]) == 99);
+}
+
+void
+vla (void)
+{
+  unsigned n;
+
+  n = 99;
+  assert (__lengthof__ (short [n - 10]) == 99 - 10);
+
+  int v[n / 2];
+  assert (__lengthof__ (v) == 99 / 2);
+
+  n = 0;
+  int z[n];
+  assert (__lengthof__ (z) == 0);
+}
+
+void
+member (void)
+{
+  struct {
+    int a[8];
+  } s;
+
+  assert (__lengthof__ (s.a) == 8);
+}
+
+void
+vla_eval (void)
+{
+  int i;
+
+  i = 7;
+  assert (__lengthof__ (struct {int x;}[i++]) == 7);
+  assert (i == 7 + 1);
+
+  int v[i];
+  int (*p)[i];
+  p = &v;
+  assert (__lengthof__ (*p++) == i);
+  assert (p - 1 == &v);
+}
+
+void
+inner_vla_noeval (void)
+{
+  int i;
+
+  i = 3;
+  assert (__lengthof__ (struct {int x[i++];}[3]) == 3);
+  assert (i == 3);
+}
+
+void
+array_noeval (void)
+{
+  long a[5];
+  long (*p)[__lengthof__ (a)];
+
+  p = &a;
+  assert (__lengthof__ (*p++) == 5);
+  assert (p == &a);
+}
+
+void
+matrix_zero (void)
+{
+  int i;
+
+  assert (__lengthof__ (int [0][4]) == 0);
+  i = 3;
+  assert (__lengthof__ (int [0][i]) == 0);
+}
+
+void
+matrix_fixed (void)
+{
+  int i;
+
+  assert (__lengthof__ (int [7][4]) == 7);
+  i = 3;
+  assert (__lengthof__ (int [7][i]) == 7);
+}
+
+void
+matrix_vla (void)
+{
+  int i, j;
+
+  i = 7;
+  assert (__lengthof__ (int [i++][4]) == 7);
+  assert (i == 7 + 1);
+
+  i = 9;
+  j = 3;
+  assert (__lengthof__ (int [i++][j]) == 9);
+  assert (i == 9 + 1);
+}
+
+int
+main (void)
+{
+  array ();
+  vla ();
+  member ();
+  vla_eval ();
+  inner_vla_noeval ();
+  array_noeval ();
+  matrix_zero ();
+  matrix_fixed ();
+  matrix_vla ();
+}