diff mbox series

c++: fix wrong ambiguity resolution [PR29834]

Message ID 20240720183125.26575-1-polacek@redhat.com
State New
Headers show
Series c++: fix wrong ambiguity resolution [PR29834] | expand

Commit Message

Marek Polacek July 20, 2024, 6:31 p.m. UTC
[ Entering the contest to fix the oldest PR in this cycle. ]

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This 18-year-old PR reports that we parse certain comma expressions
as a declaration rather than statement when the statement begins with
a functional-style cast expression.  Consider

  int(x), 0;

which does not declare x--it only casts x to int--, whereas

  int(x), (y);

declares x and y.  We need some kind of look-ahead to decide how we
should disambiguate the construct, because cp_parser_init_declarator
commits eagerly once it has seen "int(x)", and then it's too late to
recover.

This patch makes us try to parse the code as a sequence of declarators;
if that fails, we are likely looking at a statement.  That's a simple
idea, but it's complicated by code like

  void (*p)(void *)(fun);

which initializes a pointer-to-function, or

  int(x), (x) + 1;

which is an expression statement, but the second (x) is parsed as
a valid declarator, only the + after reveals that the whole thing
is an expression.  You can have things like

  int(**p)

which by itself doesn't tell you much.  You can have

  int(*q)(void*)

which looks like it starts with a functional-style cast, but it is not
a cast.  The simple

  int(x) = 42;

has an initializer so it declares x; it is not an assignment.  But then,

    int(d) __attribute__(());

does not have an initializer, but the attribute makes it a declaration.

	PR c++/29834
	PR c++/54905

gcc/cp/ChangeLog:

	* parser.cc (cp_parser_lambda_introducer): Use
	cp_parser_next_token_starts_initializer_p.
	(cp_parser_simple_declaration): Add look-ahead to decide if we're
	looking at a declaration or statement.
	(cp_parser_next_token_starts_initializer_p): New.

gcc/testsuite/ChangeLog:

	* g++.dg/parse/ambig15.C: New test.
	* g++.dg/parse/ambig16.C: New test.
---
 gcc/cp/parser.cc                     | 73 ++++++++++++++++++++++--
 gcc/testsuite/g++.dg/parse/ambig15.C | 83 ++++++++++++++++++++++++++++
 gcc/testsuite/g++.dg/parse/ambig16.C | 18 ++++++
 3 files changed, 168 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/parse/ambig15.C
 create mode 100644 gcc/testsuite/g++.dg/parse/ambig16.C


base-commit: 493c55578fe00f5f4a7534b8f5cb5213f86f4d01

Comments

Jason Merrill July 23, 2024, 4:53 a.m. UTC | #1
On 7/20/24 2:31 PM, Marek Polacek wrote:
> [ Entering the contest to fix the oldest PR in this cycle. ]
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
> -- >8 --
> This 18-year-old PR reports that we parse certain comma expressions
> as a declaration rather than statement when the statement begins with
> a functional-style cast expression.  Consider
> 
>    int(x), 0;
> 
> which does not declare x--it only casts x to int--, whereas
> 
>    int(x), (y);
> 
> declares x and y.  We need some kind of look-ahead to decide how we
> should disambiguate the construct, because cp_parser_init_declarator
> commits eagerly once it has seen "int(x)", and then it's too late to
> recover.
> 
> This patch makes us try to parse the code as a sequence of declarators;
> if that fails, we are likely looking at a statement.  That's a simple
> idea, but it's complicated by code like
> 
>    void (*p)(void *)(fun);
> 
> which initializes a pointer-to-function, or
> 
>    int(x), (x) + 1;
> 
> which is an expression statement, but the second (x) is parsed as
> a valid declarator, only the + after reveals that the whole thing
> is an expression.  You can have things like
> 
>    int(**p)
> 
> which by itself doesn't tell you much.  You can have
> 
>    int(*q)(void*)
> 
> which looks like it starts with a functional-style cast, but it is not
> a cast.  The simple
> 
>    int(x) = 42;
> 
> has an initializer so it declares x; it is not an assignment.  But then,
> 
>      int(d) __attribute__(());
> 
> does not have an initializer, but the attribute makes it a declaration.
> 
> 	PR c++/29834
> 	PR c++/54905
> 
> gcc/cp/ChangeLog:
> 
> 	* parser.cc (cp_parser_lambda_introducer): Use
> 	cp_parser_next_token_starts_initializer_p.
> 	(cp_parser_simple_declaration): Add look-ahead to decide if we're
> 	looking at a declaration or statement.
> 	(cp_parser_next_token_starts_initializer_p): New.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.dg/parse/ambig15.C: New test.
> 	* g++.dg/parse/ambig16.C: New test.
> ---
>   gcc/cp/parser.cc                     | 73 ++++++++++++++++++++++--
>   gcc/testsuite/g++.dg/parse/ambig15.C | 83 ++++++++++++++++++++++++++++
>   gcc/testsuite/g++.dg/parse/ambig16.C | 18 ++++++
>   3 files changed, 168 insertions(+), 6 deletions(-)
>   create mode 100644 gcc/testsuite/g++.dg/parse/ambig15.C
>   create mode 100644 gcc/testsuite/g++.dg/parse/ambig16.C
> 
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 1fa0780944b..797cfc3204e 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -2947,6 +2947,8 @@ static bool cp_parser_next_token_ends_template_argument_p
>     (cp_parser *);
>   static bool cp_parser_nth_token_starts_template_argument_list_p
>     (cp_parser *, size_t);
> +static bool cp_parser_next_token_starts_initializer_p
> +  (cp_parser *);
>   static enum tag_types cp_parser_token_is_class_key
>     (cp_token *);
>   static enum tag_types cp_parser_token_is_type_parameter_key
> @@ -11663,9 +11665,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
>   	}
>   
>         /* Find the initializer for this capture.  */
> -      if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
> -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
> -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
> +      if (cp_parser_next_token_starts_initializer_p (parser))
>   	{
>   	  /* An explicit initializer exists.  */
>   	  if (cxx_dialect < cxx14)
> @@ -11747,9 +11747,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
>   		  /* If what follows is an initializer, the second '...' is
>   		     invalid.  But for cases like [...xs...], the first one
>   		     is invalid.  */
> -		  if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
> -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
> -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
> +		  if (cp_parser_next_token_starts_initializer_p (parser))
>   		    ellipsis_loc = loc;
>   		  error_at (ellipsis_loc, "too many %<...%> in lambda capture");
>   		  continue;
> @@ -16047,6 +16045,58 @@ cp_parser_simple_declaration (cp_parser* parser,
>       else
>         break;
>   
> +  /* If we are still uncommitted, we're probably looking at something like
> +     T(x), which can be a declaration but does not have to be, depending
> +     on what comes after.  Consider
> +       int(x), 0;
> +     which is _not_ a declaration of x, it's a functional cast, and
> +       int(x), (y);
> +     which declares x and y.  We need some kind of look-ahead to decide,
> +     cp_parser_init_declarator below will commit eagerly once it has seen
> +     "int(x)".  So we try to parse this as a sequence of declarators; if
> +     that fails, we are likely looking at a statement.  (We could avoid
> +     all of this if there is no non-nested comma.)  */

Unfortunately, this seems to break

int main()
{
   int(x), y[sizeof(x)];
}

Jason
Marek Polacek July 23, 2024, 8:18 p.m. UTC | #2
On Tue, Jul 23, 2024 at 12:53:07AM -0400, Jason Merrill wrote:
> On 7/20/24 2:31 PM, Marek Polacek wrote:
> > [ Entering the contest to fix the oldest PR in this cycle. ]
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > This 18-year-old PR reports that we parse certain comma expressions
> > as a declaration rather than statement when the statement begins with
> > a functional-style cast expression.  Consider
> > 
> >    int(x), 0;
> > 
> > which does not declare x--it only casts x to int--, whereas
> > 
> >    int(x), (y);
> > 
> > declares x and y.  We need some kind of look-ahead to decide how we
> > should disambiguate the construct, because cp_parser_init_declarator
> > commits eagerly once it has seen "int(x)", and then it's too late to
> > recover.
> > 
> > This patch makes us try to parse the code as a sequence of declarators;
> > if that fails, we are likely looking at a statement.  That's a simple
> > idea, but it's complicated by code like
> > 
> >    void (*p)(void *)(fun);
> > 
> > which initializes a pointer-to-function, or
> > 
> >    int(x), (x) + 1;
> > 
> > which is an expression statement, but the second (x) is parsed as
> > a valid declarator, only the + after reveals that the whole thing
> > is an expression.  You can have things like
> > 
> >    int(**p)
> > 
> > which by itself doesn't tell you much.  You can have
> > 
> >    int(*q)(void*)
> > 
> > which looks like it starts with a functional-style cast, but it is not
> > a cast.  The simple
> > 
> >    int(x) = 42;
> > 
> > has an initializer so it declares x; it is not an assignment.  But then,
> > 
> >      int(d) __attribute__(());
> > 
> > does not have an initializer, but the attribute makes it a declaration.
> > 
> > 	PR c++/29834
> > 	PR c++/54905
> > 
> > gcc/cp/ChangeLog:
> > 
> > 	* parser.cc (cp_parser_lambda_introducer): Use
> > 	cp_parser_next_token_starts_initializer_p.
> > 	(cp_parser_simple_declaration): Add look-ahead to decide if we're
> > 	looking at a declaration or statement.
> > 	(cp_parser_next_token_starts_initializer_p): New.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 	* g++.dg/parse/ambig15.C: New test.
> > 	* g++.dg/parse/ambig16.C: New test.
> > ---
> >   gcc/cp/parser.cc                     | 73 ++++++++++++++++++++++--
> >   gcc/testsuite/g++.dg/parse/ambig15.C | 83 ++++++++++++++++++++++++++++
> >   gcc/testsuite/g++.dg/parse/ambig16.C | 18 ++++++
> >   3 files changed, 168 insertions(+), 6 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/parse/ambig15.C
> >   create mode 100644 gcc/testsuite/g++.dg/parse/ambig16.C
> > 
> > diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> > index 1fa0780944b..797cfc3204e 100644
> > --- a/gcc/cp/parser.cc
> > +++ b/gcc/cp/parser.cc
> > @@ -2947,6 +2947,8 @@ static bool cp_parser_next_token_ends_template_argument_p
> >     (cp_parser *);
> >   static bool cp_parser_nth_token_starts_template_argument_list_p
> >     (cp_parser *, size_t);
> > +static bool cp_parser_next_token_starts_initializer_p
> > +  (cp_parser *);
> >   static enum tag_types cp_parser_token_is_class_key
> >     (cp_token *);
> >   static enum tag_types cp_parser_token_is_type_parameter_key
> > @@ -11663,9 +11665,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
> >   	}
> >         /* Find the initializer for this capture.  */
> > -      if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
> > -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
> > -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
> > +      if (cp_parser_next_token_starts_initializer_p (parser))
> >   	{
> >   	  /* An explicit initializer exists.  */
> >   	  if (cxx_dialect < cxx14)
> > @@ -11747,9 +11747,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
> >   		  /* If what follows is an initializer, the second '...' is
> >   		     invalid.  But for cases like [...xs...], the first one
> >   		     is invalid.  */
> > -		  if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
> > -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
> > -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
> > +		  if (cp_parser_next_token_starts_initializer_p (parser))
> >   		    ellipsis_loc = loc;
> >   		  error_at (ellipsis_loc, "too many %<...%> in lambda capture");
> >   		  continue;
> > @@ -16047,6 +16045,58 @@ cp_parser_simple_declaration (cp_parser* parser,
> >       else
> >         break;
> > +  /* If we are still uncommitted, we're probably looking at something like
> > +     T(x), which can be a declaration but does not have to be, depending
> > +     on what comes after.  Consider
> > +       int(x), 0;
> > +     which is _not_ a declaration of x, it's a functional cast, and
> > +       int(x), (y);
> > +     which declares x and y.  We need some kind of look-ahead to decide,
> > +     cp_parser_init_declarator below will commit eagerly once it has seen
> > +     "int(x)".  So we try to parse this as a sequence of declarators; if
> > +     that fails, we are likely looking at a statement.  (We could avoid
> > +     all of this if there is no non-nested comma.)  */
> 
> Unfortunately, this seems to break
> 
> int main()
> {
>   int(x), y[sizeof(x)];
> }

Oy.  That's a problem, thanks for catching that.  So:

  void f(int v, int *w)
  {
    int(x), y[sizeof(x)]; // decl
    int(v), w[sizeof(v)], true; // expr
  }

The problem is that the cp_parser_declarator call I added was meant to
be purely syntactic checking -- does it _look_ like it could be a decl? --
but here it emits an error.

Is it worth experimenting with a new CP_PARSER_SYNTAX_ONLY flag to
prevent emitting any errors?

Marek
Jason Merrill July 23, 2024, 9:12 p.m. UTC | #3
On 7/23/24 4:18 PM, Marek Polacek wrote:
> On Tue, Jul 23, 2024 at 12:53:07AM -0400, Jason Merrill wrote:
>> On 7/20/24 2:31 PM, Marek Polacek wrote:
>>> [ Entering the contest to fix the oldest PR in this cycle. ]
>>>
>>> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
>>>
>>> -- >8 --
>>> This 18-year-old PR reports that we parse certain comma expressions
>>> as a declaration rather than statement when the statement begins with
>>> a functional-style cast expression.  Consider
>>>
>>>     int(x), 0;
>>>
>>> which does not declare x--it only casts x to int--, whereas
>>>
>>>     int(x), (y);
>>>
>>> declares x and y.  We need some kind of look-ahead to decide how we
>>> should disambiguate the construct, because cp_parser_init_declarator
>>> commits eagerly once it has seen "int(x)", and then it's too late to
>>> recover.
>>>
>>> This patch makes us try to parse the code as a sequence of declarators;
>>> if that fails, we are likely looking at a statement.  That's a simple
>>> idea, but it's complicated by code like
>>>
>>>     void (*p)(void *)(fun);
>>>
>>> which initializes a pointer-to-function, or
>>>
>>>     int(x), (x) + 1;
>>>
>>> which is an expression statement, but the second (x) is parsed as
>>> a valid declarator, only the + after reveals that the whole thing
>>> is an expression.  You can have things like
>>>
>>>     int(**p)
>>>
>>> which by itself doesn't tell you much.  You can have
>>>
>>>     int(*q)(void*)
>>>
>>> which looks like it starts with a functional-style cast, but it is not
>>> a cast.  The simple
>>>
>>>     int(x) = 42;
>>>
>>> has an initializer so it declares x; it is not an assignment.  But then,
>>>
>>>       int(d) __attribute__(());
>>>
>>> does not have an initializer, but the attribute makes it a declaration.
>>>
>>> 	PR c++/29834
>>> 	PR c++/54905
>>>
>>> gcc/cp/ChangeLog:
>>>
>>> 	* parser.cc (cp_parser_lambda_introducer): Use
>>> 	cp_parser_next_token_starts_initializer_p.
>>> 	(cp_parser_simple_declaration): Add look-ahead to decide if we're
>>> 	looking at a declaration or statement.
>>> 	(cp_parser_next_token_starts_initializer_p): New.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 	* g++.dg/parse/ambig15.C: New test.
>>> 	* g++.dg/parse/ambig16.C: New test.
>>> ---
>>>    gcc/cp/parser.cc                     | 73 ++++++++++++++++++++++--
>>>    gcc/testsuite/g++.dg/parse/ambig15.C | 83 ++++++++++++++++++++++++++++
>>>    gcc/testsuite/g++.dg/parse/ambig16.C | 18 ++++++
>>>    3 files changed, 168 insertions(+), 6 deletions(-)
>>>    create mode 100644 gcc/testsuite/g++.dg/parse/ambig15.C
>>>    create mode 100644 gcc/testsuite/g++.dg/parse/ambig16.C
>>>
>>> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
>>> index 1fa0780944b..797cfc3204e 100644
>>> --- a/gcc/cp/parser.cc
>>> +++ b/gcc/cp/parser.cc
>>> @@ -2947,6 +2947,8 @@ static bool cp_parser_next_token_ends_template_argument_p
>>>      (cp_parser *);
>>>    static bool cp_parser_nth_token_starts_template_argument_list_p
>>>      (cp_parser *, size_t);
>>> +static bool cp_parser_next_token_starts_initializer_p
>>> +  (cp_parser *);
>>>    static enum tag_types cp_parser_token_is_class_key
>>>      (cp_token *);
>>>    static enum tag_types cp_parser_token_is_type_parameter_key
>>> @@ -11663,9 +11665,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
>>>    	}
>>>          /* Find the initializer for this capture.  */
>>> -      if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
>>> -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
>>> -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
>>> +      if (cp_parser_next_token_starts_initializer_p (parser))
>>>    	{
>>>    	  /* An explicit initializer exists.  */
>>>    	  if (cxx_dialect < cxx14)
>>> @@ -11747,9 +11747,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
>>>    		  /* If what follows is an initializer, the second '...' is
>>>    		     invalid.  But for cases like [...xs...], the first one
>>>    		     is invalid.  */
>>> -		  if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
>>> -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
>>> -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
>>> +		  if (cp_parser_next_token_starts_initializer_p (parser))
>>>    		    ellipsis_loc = loc;
>>>    		  error_at (ellipsis_loc, "too many %<...%> in lambda capture");
>>>    		  continue;
>>> @@ -16047,6 +16045,58 @@ cp_parser_simple_declaration (cp_parser* parser,
>>>        else
>>>          break;
>>> +  /* If we are still uncommitted, we're probably looking at something like
>>> +     T(x), which can be a declaration but does not have to be, depending
>>> +     on what comes after.  Consider
>>> +       int(x), 0;
>>> +     which is _not_ a declaration of x, it's a functional cast, and
>>> +       int(x), (y);
>>> +     which declares x and y.  We need some kind of look-ahead to decide,
>>> +     cp_parser_init_declarator below will commit eagerly once it has seen
>>> +     "int(x)".  So we try to parse this as a sequence of declarators; if
>>> +     that fails, we are likely looking at a statement.  (We could avoid
>>> +     all of this if there is no non-nested comma.)  */
>>
>> Unfortunately, this seems to break
>>
>> int main()
>> {
>>    int(x), y[sizeof(x)];
>> }
> 
> Oy.  That's a problem, thanks for catching that.  So:
> 
>    void f(int v, int *w)
>    {
>      int(x), y[sizeof(x)]; // decl
>      int(v), w[sizeof(v)], true; // expr
>    }
> 
> The problem is that the cp_parser_declarator call I added was meant to
> be purely syntactic checking -- does it _look_ like it could be a decl? --
> but here it emits an error.
> 
> Is it worth experimenting with a new CP_PARSER_SYNTAX_ONLY flag to
> prevent emitting any errors?

I've wished for a while that we would store diagnostics during tentative 
parsing and then emit them all once we commit to the tentative parse.

With that functionality, we could parse the decls normally (but 
tentatively) and then go back and remove them from the current scope if 
they turn out not to have actually been declarations?

Extremely vexing.

Jason
Marek Polacek July 23, 2024, 9:36 p.m. UTC | #4
On Tue, Jul 23, 2024 at 05:12:14PM -0400, Jason Merrill wrote:
> On 7/23/24 4:18 PM, Marek Polacek wrote:
> > On Tue, Jul 23, 2024 at 12:53:07AM -0400, Jason Merrill wrote:
> > > On 7/20/24 2:31 PM, Marek Polacek wrote:
> > > > [ Entering the contest to fix the oldest PR in this cycle. ]
> > > > 
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > 
> > > > -- >8 --
> > > > This 18-year-old PR reports that we parse certain comma expressions
> > > > as a declaration rather than statement when the statement begins with
> > > > a functional-style cast expression.  Consider
> > > > 
> > > >     int(x), 0;
> > > > 
> > > > which does not declare x--it only casts x to int--, whereas
> > > > 
> > > >     int(x), (y);
> > > > 
> > > > declares x and y.  We need some kind of look-ahead to decide how we
> > > > should disambiguate the construct, because cp_parser_init_declarator
> > > > commits eagerly once it has seen "int(x)", and then it's too late to
> > > > recover.
> > > > 
> > > > This patch makes us try to parse the code as a sequence of declarators;
> > > > if that fails, we are likely looking at a statement.  That's a simple
> > > > idea, but it's complicated by code like
> > > > 
> > > >     void (*p)(void *)(fun);
> > > > 
> > > > which initializes a pointer-to-function, or
> > > > 
> > > >     int(x), (x) + 1;
> > > > 
> > > > which is an expression statement, but the second (x) is parsed as
> > > > a valid declarator, only the + after reveals that the whole thing
> > > > is an expression.  You can have things like
> > > > 
> > > >     int(**p)
> > > > 
> > > > which by itself doesn't tell you much.  You can have
> > > > 
> > > >     int(*q)(void*)
> > > > 
> > > > which looks like it starts with a functional-style cast, but it is not
> > > > a cast.  The simple
> > > > 
> > > >     int(x) = 42;
> > > > 
> > > > has an initializer so it declares x; it is not an assignment.  But then,
> > > > 
> > > >       int(d) __attribute__(());
> > > > 
> > > > does not have an initializer, but the attribute makes it a declaration.
> > > > 
> > > > 	PR c++/29834
> > > > 	PR c++/54905
> > > > 
> > > > gcc/cp/ChangeLog:
> > > > 
> > > > 	* parser.cc (cp_parser_lambda_introducer): Use
> > > > 	cp_parser_next_token_starts_initializer_p.
> > > > 	(cp_parser_simple_declaration): Add look-ahead to decide if we're
> > > > 	looking at a declaration or statement.
> > > > 	(cp_parser_next_token_starts_initializer_p): New.
> > > > 
> > > > gcc/testsuite/ChangeLog:
> > > > 
> > > > 	* g++.dg/parse/ambig15.C: New test.
> > > > 	* g++.dg/parse/ambig16.C: New test.
> > > > ---
> > > >    gcc/cp/parser.cc                     | 73 ++++++++++++++++++++++--
> > > >    gcc/testsuite/g++.dg/parse/ambig15.C | 83 ++++++++++++++++++++++++++++
> > > >    gcc/testsuite/g++.dg/parse/ambig16.C | 18 ++++++
> > > >    3 files changed, 168 insertions(+), 6 deletions(-)
> > > >    create mode 100644 gcc/testsuite/g++.dg/parse/ambig15.C
> > > >    create mode 100644 gcc/testsuite/g++.dg/parse/ambig16.C
> > > > 
> > > > diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> > > > index 1fa0780944b..797cfc3204e 100644
> > > > --- a/gcc/cp/parser.cc
> > > > +++ b/gcc/cp/parser.cc
> > > > @@ -2947,6 +2947,8 @@ static bool cp_parser_next_token_ends_template_argument_p
> > > >      (cp_parser *);
> > > >    static bool cp_parser_nth_token_starts_template_argument_list_p
> > > >      (cp_parser *, size_t);
> > > > +static bool cp_parser_next_token_starts_initializer_p
> > > > +  (cp_parser *);
> > > >    static enum tag_types cp_parser_token_is_class_key
> > > >      (cp_token *);
> > > >    static enum tag_types cp_parser_token_is_type_parameter_key
> > > > @@ -11663,9 +11665,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
> > > >    	}
> > > >          /* Find the initializer for this capture.  */
> > > > -      if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
> > > > -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
> > > > -	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
> > > > +      if (cp_parser_next_token_starts_initializer_p (parser))
> > > >    	{
> > > >    	  /* An explicit initializer exists.  */
> > > >    	  if (cxx_dialect < cxx14)
> > > > @@ -11747,9 +11747,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
> > > >    		  /* If what follows is an initializer, the second '...' is
> > > >    		     invalid.  But for cases like [...xs...], the first one
> > > >    		     is invalid.  */
> > > > -		  if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
> > > > -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
> > > > -		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
> > > > +		  if (cp_parser_next_token_starts_initializer_p (parser))
> > > >    		    ellipsis_loc = loc;
> > > >    		  error_at (ellipsis_loc, "too many %<...%> in lambda capture");
> > > >    		  continue;
> > > > @@ -16047,6 +16045,58 @@ cp_parser_simple_declaration (cp_parser* parser,
> > > >        else
> > > >          break;
> > > > +  /* If we are still uncommitted, we're probably looking at something like
> > > > +     T(x), which can be a declaration but does not have to be, depending
> > > > +     on what comes after.  Consider
> > > > +       int(x), 0;
> > > > +     which is _not_ a declaration of x, it's a functional cast, and
> > > > +       int(x), (y);
> > > > +     which declares x and y.  We need some kind of look-ahead to decide,
> > > > +     cp_parser_init_declarator below will commit eagerly once it has seen
> > > > +     "int(x)".  So we try to parse this as a sequence of declarators; if
> > > > +     that fails, we are likely looking at a statement.  (We could avoid
> > > > +     all of this if there is no non-nested comma.)  */
> > > 
> > > Unfortunately, this seems to break
> > > 
> > > int main()
> > > {
> > >    int(x), y[sizeof(x)];
> > > }
> > 
> > Oy.  That's a problem, thanks for catching that.  So:
> > 
> >    void f(int v, int *w)
> >    {
> >      int(x), y[sizeof(x)]; // decl
> >      int(v), w[sizeof(v)], true; // expr
> >    }
> > 
> > The problem is that the cp_parser_declarator call I added was meant to
> > be purely syntactic checking -- does it _look_ like it could be a decl? --
> > but here it emits an error.
> > 
> > Is it worth experimenting with a new CP_PARSER_SYNTAX_ONLY flag to
> > prevent emitting any errors?
> 
> I've wished for a while that we would store diagnostics during tentative
> parsing and then emit them all once we commit to the tentative parse.

Yea, it's come up a few times, hasn't it.  It would fix

https://gcc.gnu.org/PR61259
https://gcc.gnu.org/PR109775

and I wonder what else.

> With that functionality, we could parse the decls normally (but tentatively)
> and then go back and remove them from the current scope if they turn out not
> to have actually been declarations?
> 
> Extremely vexing.

It's an interesting project, but these PRs are pretty obscure and have fairly
simple workarounds.  I think I better address other projects first...

Well, it was worth a shot.  Maybe I can at least add a testcase with your
testcase (we don't test it currently), and extract whatever already works
from my new test.

Marek
diff mbox series

Patch

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 1fa0780944b..797cfc3204e 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2947,6 +2947,8 @@  static bool cp_parser_next_token_ends_template_argument_p
   (cp_parser *);
 static bool cp_parser_nth_token_starts_template_argument_list_p
   (cp_parser *, size_t);
+static bool cp_parser_next_token_starts_initializer_p
+  (cp_parser *);
 static enum tag_types cp_parser_token_is_class_key
   (cp_token *);
 static enum tag_types cp_parser_token_is_type_parameter_key
@@ -11663,9 +11665,7 @@  cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
 	}
 
       /* Find the initializer for this capture.  */
-      if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
-	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
-	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+      if (cp_parser_next_token_starts_initializer_p (parser))
 	{
 	  /* An explicit initializer exists.  */
 	  if (cxx_dialect < cxx14)
@@ -11747,9 +11747,7 @@  cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
 		  /* If what follows is an initializer, the second '...' is
 		     invalid.  But for cases like [...xs...], the first one
 		     is invalid.  */
-		  if (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
-		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
-		      || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+		  if (cp_parser_next_token_starts_initializer_p (parser))
 		    ellipsis_loc = loc;
 		  error_at (ellipsis_loc, "too many %<...%> in lambda capture");
 		  continue;
@@ -16047,6 +16045,58 @@  cp_parser_simple_declaration (cp_parser* parser,
     else
       break;
 
+  /* If we are still uncommitted, we're probably looking at something like
+     T(x), which can be a declaration but does not have to be, depending
+     on what comes after.  Consider
+       int(x), 0;
+     which is _not_ a declaration of x, it's a functional cast, and
+       int(x), (y);
+     which declares x and y.  We need some kind of look-ahead to decide,
+     cp_parser_init_declarator below will commit eagerly once it has seen
+     "int(x)".  So we try to parse this as a sequence of declarators; if
+     that fails, we are likely looking at a statement.  (We could avoid
+     all of this if there is no non-nested comma.)  */
+  if (cp_parser_uncommitted_to_tentative_parse_p (parser)
+      && cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN))
+    {
+      bool all_ok = true;
+      cp_lexer_save_tokens (parser->lexer);
+      /* Avoid committing to outer tentative parse.  This is here to parse
+	 "void (*p)(void *);" correctly.  */
+      tentative_firewall firewall (parser);
+      while (cp_lexer_next_token_is_not (parser->lexer, CPP_SEMICOLON))
+	{
+	  /* Try to parse what follows as a declarator.  */
+	  cp_declarator *d
+	    = cp_parser_declarator (parser, CP_PARSER_DECLARATOR_NAMED,
+				    CP_PARSER_FLAGS_NONE,
+				    /*ctor_dtor_or_conv_p=*/nullptr,
+				    /*parenthesized_p=*/nullptr,
+				    /*member_p=*/false,
+				    /*friend_p=*/false,
+				    /*static_p=*/false);
+	  if (cp_parser_error_occurred (parser) || d == cp_error_declarator)
+	    {
+	      all_ok = false;
+	      break;
+	    }
+	  /* If this was not a function-style cast, go ahead with a
+	     declaration.  */
+	  if (d->kind == cdk_function
+	      /* A declarator followed by an initializer makes this an
+		 init-declarator.  */
+	      || cp_parser_next_token_starts_initializer_p (parser)
+	      || cp_next_tokens_can_be_attribute_p (parser))
+	    break;
+	  if (cp_lexer_next_token_is (parser->lexer, CPP_COMMA))
+	    cp_lexer_consume_token (parser->lexer);
+	}
+      cp_lexer_rollback_tokens (parser->lexer);
+      if (!all_ok)
+	/* Not a declaration.  Bail and parse as a statement instead.  */
+	goto done;
+    }
+
   tree last_type;
   bool auto_specifier_p;
   /* NULL_TREE if both variable and function declaration are allowed,
@@ -34936,6 +34986,17 @@  cp_parser_nth_token_starts_template_argument_list_p (cp_parser * parser,
   return false;
 }
 
+/* Returns true if the next token can start an initializer; that is, it is
+   '=', '(', or '{'.  */
+
+static bool
+cp_parser_next_token_starts_initializer_p (cp_parser *parser)
+{
+  return (cp_lexer_next_token_is (parser->lexer, CPP_EQ)
+	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)
+	  || cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE));
+}
+
 /* Returns the kind of tag indicated by TOKEN, if it is a class-key,
    or none_type otherwise.  */
 
diff --git a/gcc/testsuite/g++.dg/parse/ambig15.C b/gcc/testsuite/g++.dg/parse/ambig15.C
new file mode 100644
index 00000000000..d086b2b6bac
--- /dev/null
+++ b/gcc/testsuite/g++.dg/parse/ambig15.C
@@ -0,0 +1,83 @@ 
+// PR c++/29834
+// { dg-do compile { target c++11 } }
+
+void fun (void *);
+using T = void(void*);
+
+struct A {
+  int foo ();
+};
+
+void
+f1 ()
+{
+  int(x), a, b, c, d, e, f, g, h, etc;
+  int(x), a, b, c, d, e, f, g, h, etc, (new int);
+}
+
+int
+f2 (int x, int *p, int **pp)
+{
+  // Statements.
+  int(x), 0;
+  (int(x), 0);
+  (int(x)), 0;
+  int(*p), 0;
+  int(**pp), !**pp;
+  int(x), x + 1;
+  int(x), (x) + 1;
+  int(x), x++;
+  int(x), --x;
+  int(p[1]), 0;
+
+  // Declarations.
+  int(a), (b), (c);
+  a = b = c = 42;
+  int(g) = 0, (h) = 0;
+  int(k), l __attribute__((unused));
+  int(&r) = x;
+  void (*p1)(void*) = fun;
+  void (*p2)(void*);
+  void (*p3)(void*) __attribute__((unused)) = fun;
+  void (**p4)(void*) __attribute__((unused));
+  void (*p5)(void*) __attribute__((unused));
+  void (p6)(void*) __attribute__((unused));
+  void (&p7)(void*) __attribute__((unused)) = fun;
+  void (*p8)(void*)(fun);
+  void (*p9)(void*){fun};
+  int (p10)(int), m;
+  int(d) __attribute__(());
+  int(e) __attribute__(()), f;
+  int (A::*foo)() = &A::foo;
+  int (A::*foo2)();
+  int (*A::*foo3)();
+  int(j[1]);
+  T(fun2);
+
+  return a + b + c + r;
+}
+
+struct Doh {
+  Doh(int) {}
+};
+
+
+int
+f3 (int x)
+{
+  Doh(x), ++x;
+  return Doh(x), x;
+}
+
+void
+bad (int x, int y, int z)
+{
+  int(x) = 1; // { dg-error "shadows a parameter" }
+  int(y)(1); // { dg-error "shadows a parameter" }
+  int(z){1}; // { dg-error "shadows a parameter" }
+  void (*p)(void*), 0; // { dg-error "" }
+  int(a),;  // { dg-error "expected" }
+  int(x) __attribute__(()), 0; // { dg-error "" }
+  int(i), i; // { dg-error "redeclaration" }
+  T(fun), ++x; // { dg-error "invalid cast" }
+}
diff --git a/gcc/testsuite/g++.dg/parse/ambig16.C b/gcc/testsuite/g++.dg/parse/ambig16.C
new file mode 100644
index 00000000000..51bc16dffcf
--- /dev/null
+++ b/gcc/testsuite/g++.dg/parse/ambig16.C
@@ -0,0 +1,18 @@ 
+// PR c++/54905
+// { dg-do compile }
+
+struct F
+{
+};
+
+F f;
+struct A
+{
+  A(F& s);
+};
+
+void
+foo ()
+{
+  A(f), 1;
+}