[RFA] Support common C++ declarations inside GTY'd structures

Message ID	20121006165900.GA4119@google.com
State	New
Headers	show Return-Path: <gcc-patches-return-328314-incoming=patchwork.ozlabs.org@gcc.gnu.org> Comment: DKIM? See http://www.dkim.org Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:X-Google-DKIM-Signature:Received:Received:Received:Received:Date:From:To:Subject:Message-ID:MIME-Version:Content-Type:Content-Disposition:User-Agent:X-Gm-Message-State:X-IsSubscribed:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=E6EQM1+KPFeOxveX6x/8qaM053Jtwlhieix/JYRaaoOzincLC/3cMQ1GrK1o+X 3e81Uvrl8GZnnUEp+uCqhysxet5AQ27Hj9mMFLy7OjgJ8N/dczxBLGFsYqeZhnk1 A3K1C6qYq8RF6LFhDwWKMQt6k0fVjxT+LMTvFKy69qYTs=; Date: Sat, 6 Oct 2012 09:59:00 -0700 From: Diego Novillo <dnovillo@google.com> To: gcc-patches@gcc.gnu.org, Laurynas Biveinis <laurynas.biveinis@gmail.com>, Lawrence Crowl <crowl@google.com>, Aaron Gray <aaronngray.lists@gmail.com> Subject: [RFA] Support common C++ declarations inside GTY'd structures Message-ID: <20121006165900.GA4119@google.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

diff --git a/gcc/doc/gty.texi b/gcc/doc/gty.texi index b2214b8..ea1a928 100644 --- a/gcc/doc/gty.texi +++ b/gcc/doc/gty.texi @@ -65,6 +65,27 @@ The parser understands simple typedefs such as @code{typedef int @var{name};}. These don't need to be marked. +Since @code{gengtype}'s understanding of C++ is limited, there are +several constructs and declarations that are not supported inside +classes/structures marked for automatic GC code generation. The +following C++ constructs produce a @code{gengtype} error on +structures/classes marked for automatic GC code generation: + +@itemize @bullet +@item +Type definitions inside classes/structures are not supported. +@item +Enumerations inside classes/structures are not supported. +@end itemize + +If you have a class or structure using any of the above constructs, +you need to mark that class as @code{GTY ((user))} and provide your +own marking routines (see section @ref{User GC} for details). + +It is always valid to include function definitions inside classes. +Those are always ignored by @code{gengtype}, as it only cares about +data members. + @menu * GTY Options:: What goes inside a @code{GTY(())}. * GGC Roots:: Making global variables GGC roots. diff --git a/gcc/gengtype-lex.l b/gcc/gengtype-lex.l index 5788a6a..fd80906 100644 --- a/gcc/gengtype-lex.l +++ b/gcc/gengtype-lex.l @@ -50,12 +50,15 @@ update_lineno (const char *l, size_t len) %} -ID [[:alpha:]_][[:alnum:]_]* +CID [[:alpha:]_][[:alnum:]_]* WS [[:space:]]+ HWS [ \t\r\v\f]* IWORD short|long|(un)?signed|char|int|HOST_WIDE_INT|HOST_WIDEST_INT|bool|size_t|BOOL_BITFIELD|CPPCHAR_SIGNED_T|ino_t|dev_t|HARD_REG_SET ITYPE {IWORD}({WS}{IWORD})* + /* Include '::' in identifiers to capture C++ scope qualifiers. */ +ID {CID}({HWS}::{HWS}{CID})* EOID [^[:alnum:]_] +CXX_KEYWORD inline|public:|private:|protected:|template|operator|friend %x in_struct in_struct_comment in_comment %option warn noyywrap nounput nodefault perf-report @@ -83,6 +86,10 @@ EOID [^[:alnum:]_] BEGIN(in_struct); return UNION; } +^{HWS}class/{EOID} { + BEGIN(in_struct); + return STRUCT; +} ^{HWS}extern/{EOID} { BEGIN(in_struct); return EXTERN; @@ -93,18 +100,27 @@ EOID [^[:alnum:]_] } } + /* Parsing inside a struct, union or class declaration. */ <in_struct>{ - "/*" { BEGIN(in_struct_comment); } +"//".*\n { lexer_line.line++; } {WS} { update_lineno (yytext, yyleng); } \\\n { lexer_line.line++; } "const"/{EOID} /* don't care */ +{CXX_KEYWORD}/{EOID} | +"~" | +"&" { + *yylval = XDUPVAR (const char, yytext, yyleng, yyleng + 1); + return IGNORABLE_CXX_KEYWORD; +} "GTY"/{EOID} { return GTY_TOKEN; } "VEC"/{EOID} { return VEC_TOKEN; } "union"/{EOID} { return UNION; } "struct"/{EOID} { return STRUCT; } +"class"/{EOID} { return STRUCT; } +"typedef"/{EOID} { return TYPEDEF; } "enum"/{EOID} { return ENUM; } "ptr_alias"/{EOID} { return PTR_ALIAS; } "nested_ptr"/{EOID} { return NESTED_PTR; } @@ -127,7 +143,6 @@ EOID [^[:alnum:]_] return SCALAR; } - {ID}/{EOID} { *yylval = XDUPVAR (const char, yytext, yyleng, yyleng+1); return ID; @@ -148,7 +163,7 @@ EOID [^[:alnum:]_] } "..." { return ELLIPSIS; } -[(){},*:<>;=%|-] { return yytext[0]; } +[(){},*:<>;=%|+-] { return yytext[0]; } /* ignore pp-directives */ ^{HWS}"#"{HWS}[a-z_]+[^\n]*\n {lexer_line.line++;} @@ -159,6 +174,7 @@ EOID [^[:alnum:]_] } "/*" { BEGIN(in_comment); } +"//".*\n { lexer_line.line++; } \n { lexer_line.line++; } {ID} | "'"("\\".|[^\\])"'" | @@ -172,6 +188,7 @@ EOID [^[:alnum:]_] [^*\n] /* do nothing */ "*"/[^/] /* do nothing */ } + <in_comment>"*/" { BEGIN(INITIAL); } <in_struct_comment>"*/" { BEGIN(in_struct); } diff --git a/gcc/gengtype-parse.c b/gcc/gengtype-parse.c index 03ee781..5737a15 100644 --- a/gcc/gengtype-parse.c +++ b/gcc/gengtype-parse.c @@ -87,6 +87,7 @@ static const char *const token_names[] = { "a string constant", "a character constant", "an array declarator", + "a C++ keyword to ignore" }; /* This array is indexed by token code minus FIRST_TOKEN_WITH_VALUE. */ @@ -98,6 +99,7 @@ static const char *const token_value_format[] = { "'\"%s\"'", "\"'%s'\"", "'[%s]'", + "'%s'", }; /* Produce a printable representation for a token defined by CODE and @@ -313,78 +315,77 @@ consume_balanced (int opener, int closer) } /* Absorb a sequence of tokens, possibly including ()[]{}-delimited - expressions, until we encounter a semicolon outside any such - delimiters; absorb that too. If IMMEDIATE is true, it is an error - if the semicolon is not the first token encountered. */ + expressions, until we encounter an end-of-statement marker (a ';' or + a '}') outside any such delimiters; absorb that too. */ + static void -consume_until_semi (bool immediate) +consume_until_eos (void) { - if (immediate && token () != ';') - require (';'); for (;;) switch (token ()) { case ';': advance (); return; - default: - advance (); - break; + + case '{': + consume_balanced ('{', '}'); + return; case '(': consume_balanced ('(', ')'); break; + case '[': consume_balanced ('[', ']'); break; - case '{': - consume_balanced ('{', '}'); - break; case '}': case ']': case ')': parse_error ("unmatched '%c' while scanning for ';'", token ()); - return; + return; case EOF_TOKEN: parse_error ("unexpected end of file while scanning for ';'"); return; + + default: + advance (); + break; } } /* Absorb a sequence of tokens, possibly including ()[]{}-delimited expressions, until we encounter a comma or semicolon outside any - such delimiters; absorb that too. If IMMEDIATE is true, it is an - error if the comma or semicolon is not the first token encountered. - Returns true if the loop ended with a comma. */ + such delimiters; absorb that too. Returns true if the loop ended + with a comma. */ + static bool -consume_until_comma_or_semi (bool immediate) +consume_until_comma_or_eos () { - if (immediate && token () != ',' && token () != ';') - require2 (',', ';'); for (;;) switch (token ()) { case ',': advance (); return true; + case ';': advance (); return false; - default: - advance (); - break; + + case '{': + consume_balanced ('{', '}'); + return false; case '(': consume_balanced ('(', ')'); break; + case '[': consume_balanced ('[', ']'); break; - case '{': - consume_balanced ('{', '}'); - break; case '}': case ']': @@ -396,6 +397,10 @@ consume_until_comma_or_semi (bool immediate) case EOF_TOKEN: parse_error ("unexpected end of file while scanning for ',' or ';'"); return false; + + default: + advance (); + break; } } @@ -548,6 +553,8 @@ gtymarker_opt (void) return 0; return gtymarker (); } + + /* Declarators. The logic here is largely lifted from c-parser.c. Note that we do not have to process abstract declarators, which can @@ -584,16 +591,21 @@ array_and_function_declarators_opt (type_p ty) return ty; } -static type_p inner_declarator (type_p, const char **, options_p *); +static type_p inner_declarator (type_p, const char **, options_p *, bool); /* direct_declarator: '(' inner_declarator ')' + '(' \epsilon ')' <-- C++ ctors/dtors gtymarker_opt ID array_and_function_declarators_opt Subroutine of declarator, mutually recursive with inner_declarator; - do not use elsewhere. */ + do not use elsewhere. + + IN_STRUCT is true if we are called while parsing structures or classes. */ + static type_p -direct_declarator (type_p ty, const char **namep, options_p *optsp) +direct_declarator (type_p ty, const char **namep, options_p *optsp, + bool in_struct) { /* The first token in a direct-declarator must be an ID, a GTY marker, or an open parenthesis. */ @@ -602,18 +614,45 @@ direct_declarator (type_p ty, const char **namep, options_p *optsp) case GTY_TOKEN: *optsp = gtymarker (); /* fall through */ + case ID: *namep = require (ID); + /* If the next token is '(', we are parsing a function declaration. + Functions are ignored by gengtype, so we return NULL. */ + if (token () == '(') + return NULL; break; case '(': + /* If the declarator starts with a '(', we have three options. We + are either parsing 'TYPE (*ID)' (i.e., a function pointer) + or 'TYPE(...)'. + + The latter will be a constructor iff we are inside a + structure or class. Otherwise, it could be a typedef, but + since we explicitly reject typedefs inside structures, we can + assume that we found a ctor and return NULL. */ advance (); - ty = inner_declarator (ty, namep, optsp); + if (in_struct && token () != '*') + { + /* Found a constructor. Find and consume the closing ')'. */ + while (token () != ')') + advance (); + advance (); + /* Tell the caller to ignore this. */ + return NULL; + } + ty = inner_declarator (ty, namep, optsp, in_struct); require (')'); break; + case IGNORABLE_CXX_KEYWORD: + /* Any C++ keyword like 'operator' means that we are not looking + at a regular data declarator. */ + return NULL; + default: - parse_error ("expected '(', 'GTY', or an identifier, have %s", + parse_error ("expected '(', ')', 'GTY', or an identifier, have %s", print_cur_token ()); /* Do _not_ advance if what we have is a close squiggle brace, as we will get much better error recovery that way. */ @@ -643,23 +682,26 @@ direct_declarator (type_p ty, const char **namep, options_p *optsp) direct_declarator Mutually recursive subroutine of direct_declarator; do not use - elsewhere. */ + elsewhere. + + IN_STRUCT is true if we are called while parsing structures or classes. */ static type_p -inner_declarator (type_p ty, const char **namep, options_p *optsp) +inner_declarator (type_p ty, const char **namep, options_p *optsp, + bool in_struct) { if (token () == '*') { type_p inner; advance (); - inner = inner_declarator (ty, namep, optsp); + inner = inner_declarator (ty, namep, optsp, in_struct); if (inner == 0) return 0; else return create_pointer (ty); } else - return direct_declarator (ty, namep, optsp); + return direct_declarator (ty, namep, optsp, in_struct); } /* declarator: '*'+ direct_declarator @@ -667,10 +709,15 @@ inner_declarator (type_p ty, const char **namep, options_p *optsp) This is the sole public interface to this part of the grammar. Arguments are the type known so far, a pointer to where the name may be stored, and a pointer to where GTY options may be stored. - Returns the final type. */ + + IN_STRUCT is true when we are called to parse declarators inside + a structure or class. + + Returns the final type. */ static type_p -declarator (type_p ty, const char **namep, options_p *optsp) +declarator (type_p ty, const char **namep, options_p *optsp, + bool in_struct = false) { *namep = 0; *optsp = 0; @@ -679,7 +726,7 @@ declarator (type_p ty, const char **namep, options_p *optsp) advance (); ty = create_pointer (ty); } - return direct_declarator (ty, namep, optsp); + return direct_declarator (ty, namep, optsp, in_struct); } /* Types and declarations. */ @@ -708,18 +755,19 @@ struct_field_seq (void) if (!ty || token () == ':') { - consume_until_semi (false); + consume_until_eos (); continue; } do { - dty = declarator (ty, &name, &dopts); + dty = declarator (ty, &name, &dopts, true); + /* There could be any number of weird things after the declarator, notably bitfield declarations and __attribute__s. If this function returns true, the last thing was a comma, so we have more than one declarator paired with the current type. */ - another = consume_until_comma_or_semi (false); + another = consume_until_comma_or_eos (); if (!dty) continue; @@ -760,7 +808,12 @@ opts_have (options_p opts, const char *str) Returns a partial type; under some conditions (notably "struct foo GTY((...)) thing;") it may write an options structure to *OPTSP. -*/ + + NESTED is true when parsing a declaration already known to have a + GTY marker. In these cases, typedef and enum declarations are not + allowed because gengtype only understands types at the global + scope. */ + static type_p type (options_p *optsp, bool nested) { @@ -777,6 +830,12 @@ type (options_p *optsp, bool nested) s = typedef_name (); return resolve_typedef (s, &lexer_line); + case IGNORABLE_CXX_KEYWORD: + /* By returning NULL here, we indicate to the caller that they + should ignore everything following this keyword up to the + next ';' or '}'. */ + return NULL; + case STRUCT: case UNION: { @@ -796,8 +855,8 @@ type (options_p *optsp, bool nested) /* Top-level structures that are not explicitly tagged GTY(()) are treated as mere forward declarations. This is because there are a lot of structures that we don't need to know - about, and some of those have weird macro stuff in them - that we can't handle. */ + about, and some of those have C++ and macro constructs that + we cannot handle. */ if (nested || token () == GTY_TOKEN) { is_gty = GTY_BEFORE_ID; @@ -819,6 +878,13 @@ type (options_p *optsp, bool nested) opts = gtymarker_opt (); } + if (token () == ':') + { + /* Skip over C++ inheritance specification. */ + while (token () != '{') + advance (); + } + if (is_gty) { bool is_user_gty = opts_have (opts, "user"); @@ -853,6 +919,21 @@ type (options_p *optsp, bool nested) return find_structure (s, kind); } + case TYPEDEF: + /* In C++, a typedef inside a struct/class/union defines a new + type for that inner scope. We cannot support this in + gengtype because we have no concept of scoping. + + We handle typedefs in the global scope separately (see + parse_file), so if we find a 'typedef', we must be inside + a struct. */ + gcc_assert (nested); + parse_error ("typedefs not supported in structures marked with " + "automatic GTY markers. Use GTY((user)) to mark " + "this structure."); + advance (); + return NULL; + case ENUM: advance (); if (token () == ID) @@ -864,6 +945,23 @@ type (options_p *optsp, bool nested) if (token () == '{') consume_balanced ('{', '}'); + + /* If after parsing the enum we are at the end of the statement, + and we are currently inside a structure, then this was an + enum declaration inside this scope. + + We cannot support this for the same reason we cannot support + 'typedef' inside structures (see the TYPEDEF handler above). + If this happens, emit an error and return NULL. */ + if (nested && token () == ';') + { + parse_error ("enum definitions not supported in structures marked " + "with automatic GTY markers. Use GTY((user)) to mark " + "this structure."); + advance (); + return NULL; + } + return create_scalar_type (s); default: @@ -901,7 +999,7 @@ typedef_decl (void) /* Yet another place where we could have junk (notably attributes) after the declarator. */ - another = consume_until_comma_or_semi (false); + another = consume_until_comma_or_eos (); if (dty) do_typedef (name, dty, &lexer_line); } diff --git a/gcc/gengtype-state.c b/gcc/gengtype-state.c index c94d50b..e3317ec 100644 --- a/gcc/gengtype-state.c +++ b/gcc/gengtype-state.c @@ -961,6 +961,8 @@ write_state_type (type_p current) current->state_number = state_written_type_count; switch (current->kind) { + case TYPE_NONE: + gcc_unreachable (); case TYPE_STRUCT: write_state_struct_type (current); break; @@ -988,9 +990,6 @@ write_state_type (type_p current) case TYPE_STRING: write_state_string_type (current); break; - - default: - fatal ("Unexpected type..."); } } @@ -1318,7 +1317,6 @@ read_state_scalar_char_type (type_p *type) read_state_common_type_content (*type); } - /* Read the string_type. */ static void read_state_string_type (type_p *type) diff --git a/gcc/gengtype.c b/gcc/gengtype.c index 2135676..b9fbd96 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -497,10 +497,10 @@ struct type scalar_char = { /* Lists of various things. */ -pair_p typedefs; -type_p structures; -type_p param_structs; -pair_p variables; +pair_p typedefs = NULL; +type_p structures = NULL; +type_p param_structs = NULL; +pair_p variables = NULL; static type_p find_param_structure (type_p t, type_p param[NUM_PARAM]); static type_p adjust_field_tree_exp (type_p t, options_p opt); @@ -611,6 +611,7 @@ resolve_typedef (const char *s, struct fileloc *pos) return create_user_defined_type (s, pos); } + /* Create and return a new structure with tag NAME at POS with fields FIELDS and options O. The KIND of structure must be one of TYPE_STRUCT, TYPE_UNION or TYPE_USER_STRUCT. */ @@ -676,8 +677,7 @@ new_structure (const char *name, enum typekind kind, struct fileloc *pos, structures = s; } - if (s->u.s.line.file != NULL - || (s->u.s.lang_struct && (s->u.s.lang_struct->u.s.bitmap & bitmap))) + if (s->u.s.lang_struct && (s->u.s.lang_struct->u.s.bitmap & bitmap)) { error_at_line (pos, "duplicate definition of '%s %s'", isunion ? "union" : "struct", s->u.s.tag); @@ -763,6 +763,7 @@ create_scalar_type (const char *name) return &scalar_nonchar; } + /* Return a pointer to T. */ type_p @@ -2636,7 +2637,7 @@ walk_type (type_p t, struct walk_type_data *d) /* If a pointer type is marked as "atomic", we process the field itself, but we don't walk the data that they point to. - + There are two main cases where we walk types: to mark pointers that are reachable, and to relocate pointers when writing a PCH file. In both cases, an atomic pointer is @@ -3514,7 +3515,7 @@ write_func_for_structure (type_p orig_s, type_p s, type_p *param, { oprintf (d.of, " %s (x);\n", mark_hook_name); } - + d.prev_val[2] = "*x"; d.indent = 6; if (orig_s->kind != TYPE_USER_STRUCT) diff --git a/gcc/gengtype.h b/gcc/gengtype.h index 4a178ec..e687e48 100644 --- a/gcc/gengtype.h +++ b/gcc/gengtype.h @@ -308,7 +308,6 @@ struct type { type_p param[NUM_PARAM]; /* The actual parameter types. */ struct fileloc line; /* The source location. */ } param_struct; - } u; }; @@ -444,38 +443,38 @@ extern void parse_file (const char *name); extern bool hit_error; /* Token codes. */ -enum - { - EOF_TOKEN = 0, - - /* Per standard convention, codes in the range (0, UCHAR_MAX] - represent single characters with those character codes. */ - - CHAR_TOKEN_OFFSET = UCHAR_MAX + 1, - GTY_TOKEN = CHAR_TOKEN_OFFSET, - TYPEDEF, - EXTERN, - STATIC, - UNION, - STRUCT, - ENUM, - VEC_TOKEN, - ELLIPSIS, - PTR_ALIAS, - NESTED_PTR, - USER_GTY, - PARAM_IS, - NUM, - SCALAR, - ID, - STRING, - CHAR, - ARRAY, - - /* print_token assumes that any token >= FIRST_TOKEN_WITH_VALUE may have - a meaningful value to be printed. */ - FIRST_TOKEN_WITH_VALUE = PARAM_IS - }; +enum gty_token +{ + EOF_TOKEN = 0, + + /* Per standard convention, codes in the range (0, UCHAR_MAX] + represent single characters with those character codes. */ + CHAR_TOKEN_OFFSET = UCHAR_MAX + 1, + GTY_TOKEN = CHAR_TOKEN_OFFSET, + TYPEDEF, + EXTERN, + STATIC, + UNION, + STRUCT, + ENUM, + VEC_TOKEN, + ELLIPSIS, + PTR_ALIAS, + NESTED_PTR, + USER_GTY, + PARAM_IS, + NUM, + SCALAR, + ID, + STRING, + CHAR, + ARRAY, + IGNORABLE_CXX_KEYWORD, + + /* print_token assumes that any token >= FIRST_TOKEN_WITH_VALUE may have + a meaningful value to be printed. */ + FIRST_TOKEN_WITH_VALUE = PARAM_IS +}; /* Level for verbose messages, e.g. output file generation... */

[RFA] Support common C++ declarations inside GTY'd structures

Commit Message

Comments

Patch