[6/7] json: add json parsing support

Message ID	20240815181156.1815075-7-dmalcolm@redhat.com
State	New
Headers	show Return-Path: <gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 24C903858431 From: David Malcolm <dmalcolm@redhat.com> To: gcc-patches@gcc.gnu.org Cc: David Malcolm <dmalcolm@redhat.com> Subject: [PATCH 6/7] json: add json parsing support Date: Thu, 15 Aug 2024 14:11:55 -0400 Message-Id: <20240815181156.1815075-7-dmalcolm@redhat.com> In-Reply-To: <20240815181156.1815075-1-dmalcolm@redhat.com> References: <20240815181156.1815075-1-dmalcolm@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="US-ASCII"; x-default=true Precedence: list Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	v3 of libdiagnostics \| expand [0/7] v3 of libdiagnostics [1/7] libdiagnostics v3: header [2/7] libdiagnostics v3: implementation [3/7] libdiagnostics v3: add C++ wrapper API [4/7] testsuite: move dg-test cleanup code from gcc-dg.exp to its own file [5/7] libdiagnostics v3: test suite [6/7] json: add json parsing support [7/7] libdiagnostics: add a "sarif-replay" command-line tool [PR96032]

diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 3e4c7bd645f9..64dcaddfdfbe 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1832,7 +1832,7 @@ OBJS-libcommon = diagnostic-spec.o diagnostic.o diagnostic-color.o \ diagnostic-show-locus.o \ edit-context.o \ pretty-print.o intl.o \ - json.o \ + json.o json-parsing.o \ sbitmap.o \ vec.o input.o hash-table.o ggc-none.o memory-block.o \ selftest.o selftest-diagnostic.o sort.o \ diff --git a/gcc/json-parsing.cc b/gcc/json-parsing.cc new file mode 100644 index 000000000000..78188c4fef9c --- /dev/null +++ b/gcc/json-parsing.cc @@ -0,0 +1,2394 @@ +/* JSON parsing + Copyright (C) 2017-2024 Free Software Foundation, Inc. + Contributed by David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#define INCLUDE_MEMORY +#include "system.h" +#include "coretypes.h" +#include "json-parsing.h" +#include "pretty-print.h" +#include "math.h" +#include "make-unique.h" +#include "selftest.h" + +using namespace json; + +/* Declarations relating to parsing JSON, all within an + anonymous namespace. */ + +namespace { + +/* A typedef representing a single unicode character. */ + +typedef unsigned unichar; + +/* An enum for discriminating different kinds of JSON token. */ + +enum token_id +{ + TOK_ERROR, + + TOK_EOF, + + /* Punctuation. */ + TOK_OPEN_SQUARE, + TOK_OPEN_CURLY, + TOK_CLOSE_SQUARE, + TOK_CLOSE_CURLY, + TOK_COLON, + TOK_COMMA, + + /* Literal names. */ + TOK_TRUE, + TOK_FALSE, + TOK_NULL, + + TOK_STRING, + TOK_FLOAT_NUMBER, + TOK_INTEGER_NUMBER +}; + +/* Human-readable descriptions of enum token_id. */ + +static const char *token_id_name[] = { + "error", + "EOF", + "'['", + "'{'", + "']'", + "'}'", + "':'", + "','", + "'true'", + "'false'", + "'null'", + "string", + "number", + "number" +}; + +/* Tokens within the JSON lexer. */ + +struct token +{ + /* The kind of token. */ + enum token_id id; + + /* The location of this token within the unicode + character stream. */ + location_map::range range; + + union + { + /* Value for TOK_ERROR and TOK_STRING. */ + char *string; + + /* Value for TOK_FLOAT_NUMBER. */ + double float_number; + + /* Value for TOK_INTEGER_NUMBER. */ + long integer_number; + } u; +}; + +/* A class for lexing JSON. */ + +class lexer +{ + public: + lexer (bool support_comments); + ~lexer (); + + std::unique_ptr<error> add_utf8 (size_t length, const char *utf8_buf); + + const token *peek (); + + void consume (); + + private: + bool get_char (unichar &out_char, location_map::point *out_point); + void unget_char (); + location_map::point get_next_point () const; + static void dump_token (FILE *outf, const token *tok); + void lex_token (token *out); + void lex_string (token *out); + void lex_number (token *out, unichar first_char); + bool rest_of_literal (token *out, const char *suffix); + std::unique_ptr<error> make_error (const char *msg); + bool consume_single_line_comment (token *out); + bool consume_multiline_comment (token *out); + + private: + auto_vec<unichar> m_buffer; + int m_next_char_idx; + int m_next_char_line; + int m_next_char_column; + int m_prev_line_final_column; /* for handling unget_char after a '\n'. */ + + static const int MAX_TOKENS = 1; + token m_next_tokens[MAX_TOKENS]; + int m_num_next_tokens; + + bool m_support_comments; +}; + +/* A class for parsing JSON. */ + +class parser +{ + public: + parser (location_map *out_loc_map, + bool support_comments); + ~parser (); + + std::unique_ptr<error> + add_utf8 (size_t length, const char *utf8_buf); + + parser_result_t parse_value (int depth); + parser_result_t parse_object (int depth); + parser_result_t parse_array (int depth); + + std::unique_ptr<error> + require_eof (); + + private: + location_map::point get_next_token_start (); + location_map::point get_next_token_end (); + + std::unique_ptr<error> + require (enum token_id tok_id); + + result<enum token_id, std::unique_ptr<error>> + require_one_of (enum token_id tok_id_a, enum token_id tok_id_b); + + std::unique_ptr<error> + error_at (const location_map::range &r, + const char *fmt, ...) ATTRIBUTE_PRINTF_3; + + void maybe_record_range (json::value *jv, const location_map::range &r); + void maybe_record_range (json::value *jv, + const location_map::point &start, + const location_map::point &end); + + private: + lexer m_lexer; + location_map *m_loc_map; +}; + +} // anonymous namespace for parsing implementation + +/* Parser implementation. */ + +/* lexer's ctor. */ + +lexer::lexer (bool support_comments) +: m_buffer (), m_next_char_idx (0), + m_next_char_line (1), m_next_char_column (0), + m_prev_line_final_column (-1), + m_num_next_tokens (0), + m_support_comments (support_comments) +{ +} + +/* lexer's dtor. */ + +lexer::~lexer () +{ + while (m_num_next_tokens > 0) + consume (); +} + +/* Peek the next token. */ + +const token * +lexer::peek () +{ + if (m_num_next_tokens == 0) + { + lex_token (&m_next_tokens[0]); + m_num_next_tokens++; + } + return &m_next_tokens[0]; +} + +/* Consume the next token. */ + +void +lexer::consume () +{ + if (m_num_next_tokens == 0) + peek (); + + gcc_assert (m_num_next_tokens > 0); + gcc_assert (m_num_next_tokens <= MAX_TOKENS); + + if (0) + { + fprintf (stderr, "consuming token: "); + dump_token (stderr, &m_next_tokens[0]); + fprintf (stderr, "\n"); + } + + if (m_next_tokens[0].id == TOK_ERROR + || m_next_tokens[0].id == TOK_STRING) + free (m_next_tokens[0].u.string); + + m_num_next_tokens--; + memmove (&m_next_tokens[0], &m_next_tokens[1], + sizeof (token) * m_num_next_tokens); +} + +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's + buffer. + Return null if successful, or the error if there was a problem. */ + +std::unique_ptr<error> +lexer::add_utf8 (size_t length, const char *utf8_buf) +{ + /* Adapted from charset.c:one_utf8_to_cppchar. */ + static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; + static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + const uchar *inbuf = (const unsigned char *) (utf8_buf); + const uchar **inbufp = &inbuf; + size_t *inbytesleftp = &length; + + while (length > 0) + { + unichar c; + const uchar *inbuf = *inbufp; + size_t nbytes, i; + + c = *inbuf; + if (c < 0x80) + { + m_buffer.safe_push (c); + *inbytesleftp -= 1; + *inbufp += 1; + continue; + } + + /* The number of leading 1-bits in the first byte indicates how many + bytes follow. */ + for (nbytes = 2; nbytes < 7; nbytes++) + if ((c & ~masks[nbytes-1]) == patns[nbytes-1]) + goto found; + return make_error ("ill-formed UTF-8 sequence"); + found: + + if (*inbytesleftp < nbytes) + return make_error ("ill-formed UTF-8 sequence"); + + c = (c & masks[nbytes-1]); + inbuf++; + for (i = 1; i < nbytes; i++) + { + unichar n = *inbuf++; + if ((n & 0xC0) != 0x80) + return make_error ("ill-formed UTF-8 sequence"); + c = ((c << 6) + (n & 0x3F)); + } + + /* Make sure the shortest possible encoding was used. */ + if (( c <= 0x7F && nbytes > 1) + || (c <= 0x7FF && nbytes > 2) + || (c <= 0xFFFF && nbytes > 3) + || (c <= 0x1FFFFF && nbytes > 4) + || (c <= 0x3FFFFFF && nbytes > 5)) + return make_error ("ill-formed UTF-8:" + " shortest possible encoding not used"); + + /* Make sure the character is valid. */ + if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF)) + return make_error ("ill-formed UTF-8: invalid character"); + + m_buffer.safe_push (c); + *inbufp = inbuf; + *inbytesleftp -= nbytes; + } + return nullptr; +} + +/* Attempt to get the next unicode character from this lexer's buffer. + If successful, write it to OUT_CHAR, and its location to *OUT_POINT, + and return true. + Otherwise, return false. */ + +bool +lexer::get_char (unichar &out_char, location_map::point *out_point) +{ + if (m_next_char_idx >= (int)m_buffer.length ()) + return false; + + if (out_point) + *out_point = get_next_point (); + out_char = m_buffer[m_next_char_idx++]; + + if (out_char == '\n') + { + m_next_char_line++; + m_prev_line_final_column = m_next_char_column; + m_next_char_column = 0; + } + else + m_next_char_column++; + + return true; +} + +/* Undo the last successful get_char. */ + +void +lexer::unget_char () +{ + --m_next_char_idx; + if (m_next_char_column > 0) + --m_next_char_column; + else + { + m_next_char_line--; + m_next_char_column = m_prev_line_final_column; + /* We don't support more than one unget_char in a row. */ + gcc_assert (m_prev_line_final_column != -1); + m_prev_line_final_column = -1; + } +} + +/* Get the location of the next char. */ + +location_map::point +lexer::get_next_point () const +{ + location_map::point result; + result.m_unichar_idx = m_next_char_idx; + result.m_line = m_next_char_line; + result.m_column = m_next_char_column; + return result; +} + +/* Print a textual representation of TOK to OUTF. + This is intended for debugging the lexer and parser, + rather than for user-facing output. */ + +void +lexer::dump_token (FILE *outf, const token *tok) +{ + switch (tok->id) + { + case TOK_ERROR: + fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string); + break; + + case TOK_EOF: + fprintf (outf, "TOK_EOF"); + break; + + case TOK_OPEN_SQUARE: + fprintf (outf, "TOK_OPEN_SQUARE"); + break; + + case TOK_OPEN_CURLY: + fprintf (outf, "TOK_OPEN_CURLY"); + break; + + case TOK_CLOSE_SQUARE: + fprintf (outf, "TOK_CLOSE_SQUARE"); + break; + + case TOK_CLOSE_CURLY: + fprintf (outf, "TOK_CLOSE_CURLY"); + break; + + case TOK_COLON: + fprintf (outf, "TOK_COLON"); + break; + + case TOK_COMMA: + fprintf (outf, "TOK_COMMA"); + break; + + case TOK_TRUE: + fprintf (outf, "TOK_TRUE"); + break; + + case TOK_FALSE: + fprintf (outf, "TOK_FALSE"); + break; + + case TOK_NULL: + fprintf (outf, "TOK_NULL"); + break; + + case TOK_STRING: + fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string); + break; + + case TOK_FLOAT_NUMBER: + fprintf (outf, "TOK_FLOAT_NUMBER (%f)", tok->u.float_number); + break; + + case TOK_INTEGER_NUMBER: + fprintf (outf, "TOK_INTEGER_NUMBER (%ld)", tok->u.integer_number); + break; + + default: + gcc_unreachable (); + break; + } +} + +/* Treat "//" as a comment to the end of the line. + + This isn't compliant with the JSON spec, + but is very handy for writing DejaGnu tests. + + Return true if EOF and populate *OUT, false otherwise. */ + +bool +lexer::consume_single_line_comment (token *out) +{ + while (1) + { + unichar next_char; + if (!get_char (next_char, nullptr)) + { + out->id = TOK_EOF; + location_map::point p = get_next_point (); + out->range.m_start = p; + out->range.m_end = p; + return true; + } + if (next_char == '\n') + return false; + } +} + +/* Treat '/' '*' as a multiline comment until the next closing '*' '/'. + + This isn't compliant with the JSON spec, + but is very handy for writing DejaGnu tests. + + Return true if EOF and populate *OUT, false otherwise. */ + +bool +lexer::consume_multiline_comment (token *out) +{ + while (1) + { + unichar next_char; + if (!get_char (next_char, nullptr)) + { + out->id = TOK_ERROR; + gcc_unreachable (); // TODO + location_map::point p = get_next_point (); + out->range.m_start = p; + out->range.m_end = p; + return true; + } + if (next_char != '*') + continue; + if (!get_char (next_char, nullptr)) + { + out->id = TOK_ERROR; + gcc_unreachable (); // TODO + location_map::point p = get_next_point (); + out->range.m_start = p; + out->range.m_end = p; + return true; + } + if (next_char == '/') + return false; + } +} + +/* Attempt to lex the input buffer, writing the next token to OUT. + On errors, TOK_ERROR (or TOK_EOF) is written to OUT. */ + +void +lexer::lex_token (token *out) +{ + /* Skip to next non-whitespace char. */ + unichar next_char; + location_map::point start_point; + while (1) + { + if (!get_char (next_char, &start_point)) + { + out->id = TOK_EOF; + location_map::point p = get_next_point (); + out->range.m_start = p; + out->range.m_end = p; + return; + } + if (m_support_comments) + if (next_char == '/') + { + location_map::point point; + unichar next_next_char; + if (get_char (next_next_char, &point)) + { + switch (next_next_char) + { + case '/': + if (consume_single_line_comment (out)) + return; + continue; + case '*': + if (consume_multiline_comment (out)) + return; + continue; + default: + /* A stray single '/'. Break out of loop, so that we + handle it below as an unexpected character. */ + goto non_whitespace; + } + } + } + if (next_char != ' ' + && next_char != '\t' + && next_char != '\n' + && next_char != '\r') + break; + } + + non_whitespace: + + out->range.m_start = start_point; + out->range.m_end = start_point; + + switch (next_char) + { + case '[': + out->id = TOK_OPEN_SQUARE; + break; + + case '{': + out->id = TOK_OPEN_CURLY; + break; + + case ']': + out->id = TOK_CLOSE_SQUARE; + break; + + case '}': + out->id = TOK_CLOSE_CURLY; + break; + + case ':': + out->id = TOK_COLON; + break; + + case ',': + out->id = TOK_COMMA; + break; + + case '"': + lex_string (out); + break; + + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + lex_number (out, next_char); + break; + + case 't': + /* Handle literal "true". */ + if (rest_of_literal (out, "rue")) + { + out->id = TOK_TRUE; + break; + } + else + goto err; + + case 'f': + /* Handle literal "false". */ + if (rest_of_literal (out, "alse")) + { + out->id = TOK_FALSE; + break; + } + else + goto err; + + case 'n': + /* Handle literal "null". */ + if (rest_of_literal (out, "ull")) + { + out->id = TOK_NULL; + break; + } + else + goto err; + + err: + default: + out->id = TOK_ERROR; + out->u.string = xasprintf ("unexpected character: '%c'", next_char); + break; + } +} + +/* Having consumed an open-quote character from the lexer's buffer, attempt + to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR) + if an error occurred. + (ECMA-404 section 9; RFC 7159 section 7). */ + +void +lexer::lex_string (token *out) +{ + auto_vec<unichar> content; + bool still_going = true; + while (still_going) + { + unichar uc; + if (!get_char (uc, &out->range.m_end)) + { + out->id = TOK_ERROR; + out->range.m_end = get_next_point (); + out->u.string = xstrdup ("EOF within string"); + return; + } + switch (uc) + { + case '"': + still_going = false; + break; + case '\\': + { + unichar next_char; + if (!get_char (next_char, &out->range.m_end)) + { + out->id = TOK_ERROR; + out->range.m_end = get_next_point (); + out->u.string = xstrdup ("EOF within string");; + return; + } + switch (next_char) + { + case '"': + case '\\': + case '/': + content.safe_push (next_char); + break; + + case 'b': + content.safe_push ('\b'); + break; + + case 'f': + content.safe_push ('\f'); + break; + + case 'n': + content.safe_push ('\n'); + break; + + case 'r': + content.safe_push ('\r'); + break; + + case 't': + content.safe_push ('\t'); + break; + + case 'u': + { + unichar result = 0; + for (int i = 0; i < 4; i++) + { + unichar hexdigit; + if (!get_char (hexdigit, &out->range.m_end)) + { + out->id = TOK_ERROR; + out->range.m_end = get_next_point (); + out->u.string = xstrdup ("EOF within string"); + return; + } + result <<= 4; + if (hexdigit >= '0' && hexdigit <= '9') + result += hexdigit - '0'; + else if (hexdigit >= 'a' && hexdigit <= 'f') + result += (hexdigit - 'a') + 10; + else if (hexdigit >= 'A' && hexdigit <= 'F') + result += (hexdigit - 'A') + 10; + else + { + out->id = TOK_ERROR; + out->range.m_start = out->range.m_end; + out->u.string = xstrdup ("bogus hex char"); + return; + } + } + content.safe_push (result); + } + break; + + default: + out->id = TOK_ERROR; + out->u.string = xstrdup ("unrecognized escape char"); + return; + } + } + break; + + default: + /* Reject unescaped control characters U+0000 through U+001F + (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1). */ + if (uc <= 0x1f) + { + out->id = TOK_ERROR; + out->range.m_start = out->range.m_end; + out->u.string = xstrdup ("unescaped control char"); + return; + } + + /* Otherwise, add regular unicode code point. */ + content.safe_push (uc); + break; + } + } + + out->id = TOK_STRING; + + auto_vec<char> utf8_buf; + // Adapted from libcpp/charset.c:one_cppchar_to_utf8 + for (unsigned i = 0; i < content.length (); i++) + { + static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE }; + size_t nbytes; + uchar buf[6], *p = &buf[6]; + unichar c = content[i]; + + nbytes = 1; + if (c < 0x80) + *--p = c; + else + { + do + { + *--p = ((c & 0x3F) | 0x80); + c >>= 6; + nbytes++; + } + while (c >= 0x3F || (c & limits[nbytes-1])); + *--p = (c | masks[nbytes-1]); + } + + while (p < &buf[6]) + utf8_buf.safe_push (*p++); + } + + out->u.string = XNEWVEC (char, utf8_buf.length () + 1); + for (unsigned i = 0; i < utf8_buf.length (); i++) + out->u.string[i] = utf8_buf[i]; + out->u.string[utf8_buf.length ()] = '\0'; +} + +/* Having consumed FIRST_CHAR, an initial digit or '-' character from + the lexer's buffer attempt to lex the rest of a JSON number, writing + the result to OUT (or TOK_ERROR) if an error occurred. + (ECMA-404 section 8; RFC 7159 section 6). */ + +void +lexer::lex_number (token *out, unichar first_char) +{ + bool negate = false; + double value = 0.0; + if (first_char == '-') + { + negate = true; + if (!get_char (first_char, &out->range.m_end)) + { + out->id = TOK_ERROR; + out->range.m_start = out->range.m_end; + out->u.string = xstrdup ("expected digit"); + return; + } + } + + if (first_char == '0') + value = 0.0; + else if (!ISDIGIT (first_char)) + { + out->id = TOK_ERROR; + out->range.m_start = out->range.m_end; + out->u.string = xstrdup ("expected digit"); + return; + } + else + { + /* Got a nonzero digit; expect zero or more digits. */ + value = first_char - '0'; + while (1) + { + unichar uc; + location_map::point point; + if (!get_char (uc, &point)) + break; + if (ISDIGIT (uc)) + { + value *= 10; + value += uc -'0'; + out->range.m_end = point; + continue; + } + else + { + unget_char (); + break; + } + } + } + + /* Optional '.', followed by one or more decimals. */ + unichar next_char; + location_map::point point; + if (get_char (next_char, &point)) + { + if (next_char == '.') + { + /* Parse decimal digits. */ + bool had_digit = false; + double digit_factor = 0.1; + while (get_char (next_char, &point)) + { + if (!ISDIGIT (next_char)) + { + unget_char (); + break; + } + value += (next_char - '0') * digit_factor; + digit_factor *= 0.1; + had_digit = true; + out->range.m_end = point; + } + if (!had_digit) + { + out->id = TOK_ERROR; + out->range.m_start = point; + out->range.m_start = point; + out->u.string = xstrdup ("expected digit"); + return; + } + } + else + unget_char (); + } + + /* Parse 'e' and 'E'. */ + unichar exponent_char; + if (get_char (exponent_char, &point)) + { + if (exponent_char == 'e' || exponent_char == 'E') + { + /* Optional +/-. */ + unichar sign_char; + int exponent = 0; + bool negate_exponent = false; + bool had_exponent_digit = false; + if (!get_char (sign_char, &point)) + { + out->id = TOK_ERROR; + out->range.m_start = point; + out->range.m_start = point; + out->u.string = xstrdup ("EOF within exponent"); + return; + } + if (sign_char == '-') + negate_exponent = true; + else if (sign_char == '+') + ; + else if (ISDIGIT (sign_char)) + { + exponent = sign_char - '0'; + had_exponent_digit = true; + } + else + { + out->id = TOK_ERROR; + out->range.m_start = point; + out->range.m_start = point; + out->u.string + = xstrdup ("expected '-','+' or digit within exponent"); + return; + } + out->range.m_end = point; + + /* One or more digits (we might have seen the digit above, + though). */ + while (1) + { + unichar uc; + location_map::point point; + if (!get_char (uc, &point)) + break; + if (ISDIGIT (uc)) + { + exponent *= 10; + exponent += uc -'0'; + had_exponent_digit = true; + out->range.m_end = point; + continue; + } + else + { + unget_char (); + break; + } + } + if (!had_exponent_digit) + { + out->id = TOK_ERROR; + out->range.m_start = point; + out->range.m_start = point; + out->u.string = xstrdup ("expected digit within exponent"); + return; + } + if (negate_exponent) + exponent = -exponent; + value = value * pow (10, exponent); + } + else + unget_char (); + } + + if (negate) + value = -value; + + if (value == (long)value) + { + out->id = TOK_INTEGER_NUMBER; + out->u.integer_number = value; + } + else + { + out->id = TOK_FLOAT_NUMBER; + out->u.float_number = value; + } +} + +/* Determine if the next characters to be lexed match SUFFIX. + SUFFIX must be pure ASCII and not contain newlines. + If so, consume the characters and return true. + Otherwise, return false. */ + +bool +lexer::rest_of_literal (token *out, const char *suffix) +{ + int suffix_idx = 0; + int buf_idx = m_next_char_idx; + while (1) + { + if (suffix[suffix_idx] == '\0') + { + m_next_char_idx += suffix_idx; + m_next_char_column += suffix_idx; + out->range.m_end.m_unichar_idx += suffix_idx; + out->range.m_end.m_column += suffix_idx; + return true; + } + if (buf_idx >= (int)m_buffer.length ()) + return false; + /* This assumes that suffix is ASCII. */ + if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx]) + return false; + buf_idx++; + suffix_idx++; + } +} + +/* Create a new error instance for MSG, using the location of the next + character for the location of the error. */ + +std::unique_ptr<error> +lexer::make_error (const char *msg) +{ + location_map::point p; + p.m_unichar_idx = m_next_char_idx; + p.m_line = m_next_char_line; + p.m_column = m_next_char_column; + location_map::range r; + r.m_start = p; + r.m_end = p; + return ::make_unique<error> (r, xstrdup (msg)); +} + +/* parser's ctor. */ + +parser::parser (location_map *out_loc_map, + bool support_comments) +: m_lexer (support_comments), m_loc_map (out_loc_map) +{ +} + +/* parser's dtor. */ + +parser::~parser () +{ + if (m_loc_map) + m_loc_map->on_finished_parsing (); +} + +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's + lexer's buffer. */ + +std::unique_ptr<error> +parser::add_utf8 (size_t length, const char *utf8_buf) +{ + return m_lexer.add_utf8 (length, utf8_buf); +} + +/* Parse a JSON value (object, array, number, string, or literal). + (ECMA-404 section 5; RFC 7159 section 3). */ + +parser_result_t +parser::parse_value (int depth) +{ + const token *tok = m_lexer.peek (); + + /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9 + states: "An implementation may set limits on the maximum depth + of nesting.". + + Ideally we'd avoid this limit (e.g. by rewriting parse_value, + parse_object, and parse_array into a single function with a vec of + state). */ + const int MAX_DEPTH = 100; + if (depth >= MAX_DEPTH) + return error_at (tok->range, "maximum nesting depth exceeded: %i", + MAX_DEPTH); + + switch (tok->id) + { + case TOK_OPEN_CURLY: + return parse_object (depth); + + case TOK_STRING: + { + auto val = ::make_unique<string> (tok->u.string); + m_lexer.consume (); + maybe_record_range (val.get (), tok->range); + return parser_result_t (std::move (val)); + } + + case TOK_OPEN_SQUARE: + return parse_array (depth); + + case TOK_FLOAT_NUMBER: + { + auto val = ::make_unique<float_number> (tok->u.float_number); + m_lexer.consume (); + maybe_record_range (val.get (), tok->range); + return parser_result_t (std::move (val)); + } + + case TOK_INTEGER_NUMBER: + { + auto val = ::make_unique<integer_number> (tok->u.integer_number); + m_lexer.consume (); + maybe_record_range (val.get (), tok->range); + return parser_result_t (std::move (val)); + } + + case TOK_TRUE: + { + auto val = ::make_unique<literal> (JSON_TRUE); + m_lexer.consume (); + maybe_record_range (val.get (), tok->range); + return parser_result_t (std::move (val)); + } + + case TOK_FALSE: + { + auto val = ::make_unique<literal> (JSON_FALSE); + m_lexer.consume (); + maybe_record_range (val.get (), tok->range); + return parser_result_t (std::move (val)); + } + + case TOK_NULL: + { + auto val = ::make_unique<literal> (JSON_NULL); + m_lexer.consume (); + maybe_record_range (val.get (), tok->range); + return parser_result_t (std::move (val)); + } + + case TOK_ERROR: + return error_at (tok->range, "invalid JSON token: %s", tok->u.string); + + default: + return error_at (tok->range, "expected a JSON value but got %s", + token_id_name[tok->id]); + } +} + +/* Parse a JSON object. + (ECMA-404 section 6; RFC 7159 section 4). */ + +parser_result_t +parser::parse_object (int depth) +{ + location_map::point start = get_next_token_start (); + + require (TOK_OPEN_CURLY); + + auto obj = ::make_unique<object> (); + + const token *tok = m_lexer.peek (); + if (tok->id == TOK_CLOSE_CURLY) + { + location_map::point end = get_next_token_end (); + maybe_record_range (obj.get (), start, end); + if (auto err = require (TOK_CLOSE_CURLY)) + return parser_result_t (std::move (err)); + return parser_result_t (std::move (obj)); + } + if (tok->id != TOK_STRING) + return error_at (tok->range, + "expected string for object key after '{'; got %s", + token_id_name[tok->id]); + while (true) + { + tok = m_lexer.peek (); + if (tok->id != TOK_STRING) + return error_at (tok->range, + "expected string for object key after ','; got %s", + token_id_name[tok->id]); + label_text key = label_text::take (xstrdup (tok->u.string)); + m_lexer.consume (); + + if (auto err = require (TOK_COLON)) + return parser_result_t (std::move (err)); + + parser_result_t r = parse_value (depth + 1); + if (r.m_err) + return r; + if (!r.m_val) + return parser_result_t (std::move (obj)); + + /* We don't enforce uniqueness for keys. */ + obj->set (key.get (), std::move (r.m_val)); + + location_map::point end = get_next_token_end (); + result<enum token_id, std::unique_ptr<error>> result + (require_one_of (TOK_COMMA, TOK_CLOSE_CURLY)); + if (result.m_err) + return parser_result_t (std::move (result.m_err)); + if (result.m_val == TOK_COMMA) + continue; + else + { + /* TOK_CLOSE_CURLY. */ + maybe_record_range (obj.get (), start, end); + return parser_result_t (std::move (obj)); + } + } +} + +/* Parse a JSON array. + (ECMA-404 section 7; RFC 7159 section 5). */ + +parser_result_t +parser::parse_array (int depth) +{ + location_map::point start = get_next_token_start (); + if (auto err = require (TOK_OPEN_SQUARE)) + return parser_result_t (std::move (err)); + + auto arr = ::make_unique<array> (); + + const token *tok = m_lexer.peek (); + if (tok->id == TOK_CLOSE_SQUARE) + { + location_map::point end = get_next_token_end (); + maybe_record_range (arr.get (), start, end); + m_lexer.consume (); + return parser_result_t (std::move (arr)); + } + + while (true) + { + parser_result_t r = parse_value (depth + 1); + if (r.m_err) + return r; + + arr->append (std::move (r.m_val)); + + location_map::point end = get_next_token_end (); + result<enum token_id, std::unique_ptr<error>> result + (require_one_of (TOK_COMMA, TOK_CLOSE_SQUARE)); + if (result.m_err) + return parser_result_t (std::move (result.m_err)); + if (result.m_val == TOK_COMMA) + continue; + else + { + /* TOK_CLOSE_SQUARE. */ + maybe_record_range (arr.get (), start, end); + return parser_result_t (std::move (arr)); + } + } +} + +/* Get the start point of the next token. */ + +location_map::point +parser::get_next_token_start () +{ + const token *tok = m_lexer.peek (); + return tok->range.m_start; +} + +/* Get the end point of the next token. */ + +location_map::point +parser::get_next_token_end () +{ + const token *tok = m_lexer.peek (); + return tok->range.m_end; +} + +/* Require an EOF, or fail if there is surplus input. */ + +std::unique_ptr<error> +parser::require_eof () +{ + return require (TOK_EOF); +} + +/* Consume the next token, issuing an error if it is not of kind TOK_ID. */ + +std::unique_ptr<error> +parser::require (enum token_id tok_id) +{ + const token *tok = m_lexer.peek (); + if (tok->id != tok_id) + { + if (tok->id == TOK_ERROR) + return error_at (tok->range, + "expected %s; got bad token: %s", + token_id_name[tok_id], tok->u.string); + else + return error_at (tok->range, + "expected %s; got %s", token_id_name[tok_id], + token_id_name[tok->id]); + } + m_lexer.consume (); + return nullptr; +} + +/* Consume the next token, issuing an error if it is not of + kind TOK_ID_A or TOK_ID_B. + Return which kind it was. */ + +result<enum token_id, std::unique_ptr<error>> +parser::require_one_of (enum token_id tok_id_a, enum token_id tok_id_b) +{ + const token *tok = m_lexer.peek (); + if ((tok->id != tok_id_a) + && (tok->id != tok_id_b)) + { + if (tok->id == TOK_ERROR) + return error_at (tok->range, "expected %s or %s; got bad token: %s", + token_id_name[tok_id_a], token_id_name[tok_id_b], + tok->u.string); + else + return error_at (tok->range, "expected %s or %s; got %s", + token_id_name[tok_id_a], token_id_name[tok_id_b], + token_id_name[tok->id]); + } + enum token_id id = tok->id; + m_lexer.consume (); + return result<enum token_id, std::unique_ptr<error>> (id); +} + +/* Genarate a parsing error. */ + +std::unique_ptr<error> +parser::error_at (const location_map::range &r, const char *fmt, ...) +{ + va_list ap; + va_start (ap, fmt); + char *formatted_msg = xvasprintf (fmt, ap); + va_end (ap); + + return ::make_unique<error> (r, formatted_msg); +} + +/* Record that JV has range R within the input file. */ + +void +parser::maybe_record_range (json::value *jv, const location_map::range &r) +{ + if (m_loc_map) + m_loc_map->record_range_for_value (jv, r); +} + +/* Record that JV has range START to END within the input file. */ + +void +parser::maybe_record_range (json::value *jv, + const location_map::point &start, + const location_map::point &end) +{ + if (m_loc_map) + { + location_map::range r; + r.m_start = start; + r.m_end = end; + m_loc_map->record_range_for_value (jv, r); + } +} + +/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF + of the given LENGTH. + If ALLOW_COMMENTS is true, then allow C and C++ style-comments in the + buffer, as an extension to JSON, otherwise forbid them. + If successful, return an json::value in the result. + if there was a problem, return a json::error in the result. + If OUT_LOC_MAP is non-NULL, notify *OUT_LOC_MAP about + source locations of nodes seen during parsing. */ + +parser_result_t +json::parse_utf8_string (size_t length, + const char *utf8_buf, + bool allow_comments, + location_map *out_loc_map) +{ + parser p (out_loc_map, allow_comments); + if (auto err = p.add_utf8 (length, utf8_buf)) + return parser_result_t (std::move (err)); + parser_result_t r = p.parse_value (0); + if (r.m_err) + return r; + if (auto err = p.require_eof ()) + return parser_result_t (std::move (err)); + return r; +} + +/* Attempt to parse the nil-terminated UTF-8 encoded buffer at + UTF8_BUF. + If ALLOW_COMMENTS is true, then allow C and C++ style-comments in the + buffer, as an extension to JSON, otherwise forbid them. + If successful, return a non-NULL json::value *. + if there was a problem, return NULL and write an error + message to err_out, which must be deleted by the caller. + If OUT_LOC_MAP is non-NULL, notify *OUT_LOC_MAP about + source locations of nodes seen during parsing. */ + +json::parser_result_t +json::parse_utf8_string (const char *utf8, + bool allow_comments, + location_map *out_loc_map) +{ + return parse_utf8_string (strlen (utf8), utf8, allow_comments, + out_loc_map); +} + + +#if CHECKING_P + +namespace selftest { + +/* Selftests. */ + +#define ASSERT_PRINT_EQ(JV, FORMATTED, EXPECTED_JSON) \ + assert_print_eq (SELFTEST_LOCATION, JV, FORMATTED, EXPECTED_JSON) + +/* Implementation detail of ASSERT_RANGE_EQ. */ + +static void +assert_point_eq (const location &loc, + const location_map::point &actual_point, + size_t exp_unichar_idx, int exp_line, int exp_column) +{ + ASSERT_EQ_AT (loc, actual_point.m_unichar_idx, exp_unichar_idx); + ASSERT_EQ_AT (loc, actual_point.m_line, exp_line); + ASSERT_EQ_AT (loc, actual_point.m_column, exp_column); +} + +/* Implementation detail of ASSERT_RANGE_EQ. */ + +static void +assert_range_eq (const location &loc, + const location_map::range &actual_range, + /* Expected location. */ + size_t start_unichar_idx, int start_line, int start_column, + size_t end_unichar_idx, int end_line, int end_column) +{ + assert_point_eq (loc, actual_range.m_start, + start_unichar_idx, start_line, start_column); + assert_point_eq (loc, actual_range.m_end, + end_unichar_idx, end_line, end_column); +} + +/* Assert that ACTUAL_RANGE starts at + (START_UNICHAR_IDX, START_LINE, START_COLUMN) + and ends at (END_UNICHAR_IDX, END_LINE, END_COLUMN). */ + +#define ASSERT_RANGE_EQ(ACTUAL_RANGE, \ + START_UNICHAR_IDX, START_LINE, START_COLUMN, \ + END_UNICHAR_IDX, END_LINE, END_COLUMN) \ + assert_range_eq ((SELFTEST_LOCATION), (ACTUAL_RANGE), \ + (START_UNICHAR_IDX), (START_LINE), (START_COLUMN), \ + (END_UNICHAR_IDX), (END_LINE), (END_COLUMN)) + +/* Implementation detail of ASSERT_ERR_EQ. */ + +static void +assert_err_eq (const location &loc, + const json::error *actual_err, + /* Expected location. */ + size_t start_unichar_idx, int start_line, int start_column, + size_t end_unichar_idx, int end_line, int end_column, + const char *expected_msg) +{ + ASSERT_TRUE_AT (loc, actual_err); + const location_map::range &actual_range = actual_err->get_range (); + ASSERT_EQ_AT (loc, actual_range.m_start.m_unichar_idx, start_unichar_idx); + ASSERT_EQ_AT (loc, actual_range.m_start.m_line, start_line); + ASSERT_EQ_AT (loc, actual_range.m_start.m_column, start_column); + ASSERT_EQ_AT (loc, actual_range.m_end.m_unichar_idx, end_unichar_idx); + ASSERT_EQ_AT (loc, actual_range.m_end.m_line, end_line); + ASSERT_EQ_AT (loc, actual_range.m_end.m_column, end_column); + ASSERT_STREQ_AT (loc, actual_err->get_msg (), expected_msg); +} + +/* Assert that ACTUAL_ERR is a non-NULL json::error *, + with message EXPECTED_MSG, and that its location starts + at (START_UNICHAR_IDX, START_LINE, START_COLUMN) + and ends at (END_UNICHAR_IDX, END_LINE, END_COLUMN). */ + +#define ASSERT_ERR_EQ(ACTUAL_ERR, \ + START_UNICHAR_IDX, START_LINE, START_COLUMN, \ + END_UNICHAR_IDX, END_LINE, END_COLUMN, \ + EXPECTED_MSG) \ + assert_err_eq ((SELFTEST_LOCATION), (ACTUAL_ERR), \ + (START_UNICHAR_IDX), (START_LINE), (START_COLUMN), \ + (END_UNICHAR_IDX), (END_LINE), (END_COLUMN), \ + (EXPECTED_MSG)) + +/* Verify that the JSON lexer works as expected. */ + +static void +test_lexer () +{ + lexer l (false); + const char *str + /* 0 1 2 3 4 . */ + /* 01234567890123456789012345678901234567890123456789. */ + = (" 1066 -1 \n" + " -273.15 1e6\n" + " [ ] null true false { } \"foo\" \n"); + auto err = l.add_utf8 (strlen (str), str); + ASSERT_EQ (err, nullptr); + + /* Line 1. */ + { + const size_t line_offset = 0; + + /* Expect token: "1066" in columns 4-7. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 1066); + ASSERT_RANGE_EQ (tok->range, + line_offset + 4, 1, 4, + line_offset + 7, 1, 7); + l.consume (); + } + /* Expect token: "-1" in columns 11-12. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, -1); + ASSERT_RANGE_EQ (tok->range, + line_offset + 11, 1, 11, + line_offset + 12, 1, 12); + l.consume (); + } + } + + /* Line 2. */ + { + const size_t line_offset = 16; + + /* Expect token: "-273.15" in columns 4-10. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_FLOAT_NUMBER); + ASSERT_EQ (int(tok->u.float_number), int(-273.15)); + ASSERT_RANGE_EQ (tok->range, + line_offset + 4, 2, 4, + line_offset + 10, 2, 10); + l.consume (); + } + /* Expect token: "1e6" in columns 12-14. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 1000000); + ASSERT_RANGE_EQ (tok->range, + line_offset + 12, 2, 12, + line_offset + 14, 2, 14); + l.consume (); + } + } + + /* Line 3. */ + { + const size_t line_offset = 32; + + /* Expect token: "[". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_OPEN_SQUARE); + ASSERT_RANGE_EQ (tok->range, + line_offset + 2, 3, 2, + line_offset + 2, 3, 2); + l.consume (); + } + /* Expect token: "]". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_CLOSE_SQUARE); + ASSERT_RANGE_EQ (tok->range, + line_offset + 6, 3, 6, + line_offset + 6, 3, 6); + l.consume (); + } + /* Expect token: "null". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_NULL); + ASSERT_RANGE_EQ (tok->range, + line_offset + 8, 3, 8, + line_offset + 11, 3, 11); + l.consume (); + } + /* Expect token: "true". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_TRUE); + ASSERT_RANGE_EQ (tok->range, + line_offset + 15, 3, 15, + line_offset + 18, 3, 18); + l.consume (); + } + /* Expect token: "false". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_FALSE); + ASSERT_RANGE_EQ (tok->range, + line_offset + 21, 3, 21, + line_offset + 25, 3, 25); + l.consume (); + } + /* Expect token: "{". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_OPEN_CURLY); + ASSERT_RANGE_EQ (tok->range, + line_offset + 28, 3, 28, + line_offset + 28, 3, 28); + l.consume (); + } + /* Expect token: "}". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_CLOSE_CURLY); + ASSERT_RANGE_EQ (tok->range, + line_offset + 31, 3, 31, + line_offset + 31, 3, 31); + l.consume (); + } + /* Expect token: "\"foo\"". */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_STRING); + ASSERT_RANGE_EQ (tok->range, + line_offset + 34, 3, 34, + line_offset + 38, 3, 38); + l.consume (); + } + } +} + +/* Verify that the JSON lexer complains about single-line comments + when comments are disabled. */ + +static void +test_lexing_unsupported_single_line_comment () +{ + lexer l (false); + const char *str + /* 0 1 2 3 4 . */ + /* 01234567890123456789012345678901234567890123456789. */ + = (" 1066 // Hello world\n"); + auto err = l.add_utf8 (strlen (str), str); + ASSERT_EQ (err, nullptr); + + /* Line 1. */ + { + const size_t line_offset = 0; + const int line_1 = 1; + + /* Expect token: "1066" in columns 4-7. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 1066); + ASSERT_RANGE_EQ (tok->range, + line_offset + 4, line_1, 4, + line_offset + 7, line_1, 7); + l.consume (); + } + + /* Expect error. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_ERROR); + ASSERT_STREQ (tok->u.string, "unexpected character: '/'"); + ASSERT_RANGE_EQ (tok->range, + line_offset + 11, line_1, 11, + line_offset + 11, line_1, 11); + l.consume (); + } + } +} + +/* Verify that the JSON lexer complains about multiline comments + when comments are disabled. */ + +static void +test_lexing_unsupported_multiline_comment () +{ + lexer l (false); + const char *str + /* 0 1 2 3 4 . */ + /* 01234567890123456789012345678901234567890123456789. */ + = (" 1066 /* Hello world\n" + " continuation of comment\n" + " end of comment */ 42\n"); + auto err = l.add_utf8 (strlen (str), str); + ASSERT_EQ (err, nullptr); + + /* Line 1. */ + { + const size_t line_offset = 0; + const int line_1 = 1; + + /* Expect token: "1066" in line 1, columns 4-7. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 1066); + ASSERT_RANGE_EQ (tok->range, + line_offset + 4, line_1, 4, + line_offset + 7, line_1, 7); + l.consume (); + } + + /* Expect error. */ + { + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_ERROR); + ASSERT_STREQ (tok->u.string, "unexpected character: '/'"); + ASSERT_RANGE_EQ (tok->range, + line_offset + 11, line_1, 11, + line_offset + 11, line_1, 11); + l.consume (); + } + } +} + +/* Verify that the JSON lexer handles single-line comments + when comments are enabled. */ + +static void +test_lexing_supported_single_line_comment () +{ + lexer l (true); + const char *str + /* 0 1 2 3 4 . */ + /* 01234567890123456789012345678901234567890123456789. */ + = (" 1066 // Hello world\n" + " 42 // etc\n"); + auto err = l.add_utf8 (strlen (str), str); + ASSERT_EQ (err, nullptr); + + const size_t line_1_offset = 0; + const size_t line_2_offset = 26; + const size_t line_3_offset = line_2_offset + 17; + + /* Expect token: "1066" in line 1, columns 4-7. */ + { + const int line_1 = 1; + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 1066); + ASSERT_RANGE_EQ (tok->range, + line_1_offset + 4, line_1, 4, + line_1_offset + 7, line_1, 7); + l.consume (); + } + + /* Expect token: "42" in line 2, columns 5-6. */ + { + const int line_2 = 2; + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 42); + ASSERT_RANGE_EQ (tok->range, + line_2_offset + 5, line_2, 5, + line_2_offset + 6, line_2, 6); + l.consume (); + } + + /* Expect EOF. */ + { + const int line_3 = 3; + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_EOF); + ASSERT_RANGE_EQ (tok->range, + line_3_offset + 0, line_3, 0, + line_3_offset + 0, line_3, 0); + l.consume (); + } +} + +/* Verify that the JSON lexer handles multiline comments + when comments are enabled. */ + +static void +test_lexing_supported_multiline_comment () +{ + lexer l (true); + const char *str + /* 0 1 2 3 4 . */ + /* 01234567890123456789012345678901234567890123456789. */ + = (" 1066 /* Hello world\n" + " continuation of comment\n" + " end of comment */ 42\n"); + auto err = l.add_utf8 (strlen (str), str); + ASSERT_EQ (err, nullptr); + + const size_t line_1_offset = 0; + const size_t line_2_offset = 26; + const size_t line_3_offset = line_2_offset + 25; + const size_t line_4_offset = line_3_offset + 23; + + /* Expect token: "1066" in line 1, columns 4-7. */ + { + const int line_1 = 1; + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 1066); + ASSERT_RANGE_EQ (tok->range, + line_1_offset + 4, line_1, 4, + line_1_offset + 7, line_1, 7); + l.consume (); + } + + /* Expect token: "42" in line 3, columns 20-21. */ + { + const int line_3 = 3; + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER); + ASSERT_EQ (tok->u.integer_number, 42); + ASSERT_RANGE_EQ (tok->range, + line_3_offset + 20, line_3, 20, + line_3_offset + 21, line_3, 21); + l.consume (); + } + + /* Expect EOF. */ + { + const int line_4 = 4; + const token *tok = l.peek (); + ASSERT_EQ (tok->id, TOK_EOF); + ASSERT_RANGE_EQ (tok->range, + line_4_offset + 0, line_4, 0, + line_4_offset + 0, line_4, 0); + l.consume (); + } +} + +/* Helper class for writing JSON parsing testcases. + Attempts to parse a string in ctor, and captures the result (either + a json::value or a json::error), and a location map. */ + +struct parser_testcase +{ +public: + parser_testcase (const char *utf8_string, bool allow_comments = false) + : m_loc_map (), + m_result (parse_utf8_string (utf8_string, allow_comments, &m_loc_map)) + { + } + + const json::value *get_value () const { return m_result.m_val.get (); } + const json::error *get_error () const { return m_result.m_err.get (); } + + const location_map::range * + get_range_for_value (const json::value *jv) const + { + return m_loc_map.get_range_for_value (jv); + } + +private: + /* Concrete implementation of location_map for use in + JSON parsing selftests. */ + class test_location_map : public location_map + { + public: + void record_range_for_value (json::value *jv, const range &r) final override + { + m_map.put (jv, r); + } + + range *get_range_for_value (const json::value *jv) const + { + return const_cast<hash_map<const json::value *, range> &> (m_map) + .get (jv); + } + + private: + hash_map<const json::value *, range> m_map; + }; + + test_location_map m_loc_map; + json::parser_result_t m_result; +}; + +/* Verify that parse_utf8_string works as expected. */ + +static void +test_parse_string () +{ + const int line_1 = 1; + + { + parser_testcase tc ("\"foo\""); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_STRING); + ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (), "foo"); + ASSERT_PRINT_EQ (*jv, true, "\"foo\""); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 4, line_1, 4); + } + + { + const char *contains_quotes = "\"before \\\"quoted\\\" after\""; + parser_testcase tc (contains_quotes); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_STRING); + ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (), + "before \"quoted\" after"); + ASSERT_PRINT_EQ (*jv, true, contains_quotes); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 24, line_1, 24); + } + + /* Test of non-ASCII input. This string is the Japanese word "mojibake", + written as C octal-escaped UTF-8. */ + const char *mojibake = (/* Opening quote. */ + "\"" + /* U+6587 CJK UNIFIED IDEOGRAPH-6587 + UTF-8: 0xE6 0x96 0x87 + C octal escaped UTF-8: \346\226\207. */ + "\346\226\207" + /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 + UTF-8: 0xE5 0xAD 0x97 + C octal escaped UTF-8: \345\255\227. */ + "\345\255\227" + /* U+5316 CJK UNIFIED IDEOGRAPH-5316 + UTF-8: 0xE5 0x8C 0x96 + C octal escaped UTF-8: \345\214\226. */ + "\345\214\226" + /* U+3051 HIRAGANA LETTER KE + UTF-8: 0xE3 0x81 0x91 + C octal escaped UTF-8: \343\201\221. */ + "\343\201\221" + /* Closing quote. */ + "\""); + { + parser_testcase tc (mojibake); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_STRING); + /* Result of get_string should be UTF-8 encoded, without quotes. */ + ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (), + "\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221"); + /* Result of dump should be UTF-8 encoded, with quotes. */ + ASSERT_PRINT_EQ (*jv, false, mojibake); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 5, line_1, 5); + } + + /* Test of \u-escaped unicode. This is "mojibake" again, as above. */ + { + const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\""; + parser_testcase tc (escaped_unicode); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_STRING); + /* Result of get_string should be UTF-8 encoded, without quotes. */ + ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (), + "\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221"); + /* Result of dump should be UTF-8 encoded, with quotes. */ + ASSERT_PRINT_EQ (*jv, false, mojibake); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 25, line_1, 25); + } +} + +/* Verify that we can parse various kinds of JSON numbers. */ + +static void +test_parse_number () +{ + const int line_1 = 1; + + { + parser_testcase tc ("42"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_INTEGER); + ASSERT_EQ (as_a <const json::integer_number *> (jv)->get (), 42.0); + ASSERT_PRINT_EQ (*jv, true, "42"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 1, line_1, 1); + } + + /* Negative number. */ + { + parser_testcase tc ("-17"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_INTEGER); + ASSERT_EQ (as_a<const json::integer_number *> (jv)->get (), -17.0); + ASSERT_PRINT_EQ (*jv, true, "-17"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 2, line_1, 2); + } + + /* Decimal. */ + { + parser_testcase tc ("3.141"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (JSON_FLOAT, jv->get_kind ()); + ASSERT_EQ (3.141, ((const json::float_number *)jv)->get ()); + ASSERT_PRINT_EQ (*jv, true, "3.141"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 4, line_1, 4); + } + + /* Exponents. */ + { + { + parser_testcase tc ("3.141e+0"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_FLOAT); + ASSERT_EQ (as_a <const json::float_number *> (jv)->get (), 3.141); + ASSERT_PRINT_EQ (*jv, true, "3.141"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 7, line_1, 7); + } + { + parser_testcase tc ("42e2"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_INTEGER); + ASSERT_EQ (as_a <const json::integer_number *> (jv)->get (), 4200); + ASSERT_PRINT_EQ (*jv, true, "4200"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 3, line_1, 3); + } + { + parser_testcase tc ("42e-1"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_FLOAT); + ASSERT_EQ (as_a <const json::float_number *> (jv)->get (), 4.2); + ASSERT_PRINT_EQ (*jv, true, "4.2"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 4, line_1, 4); + } + } +} + +/* Verify that JSON array parsing works. */ + +static void +test_parse_array () +{ + const int line_1 = 1; + + parser_testcase tc ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_EQ (jv->get_kind (), JSON_ARRAY); + const json::array *arr = as_a <const json::array *> (jv); + ASSERT_EQ (arr->length (), 10); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 29, line_1, 29); + for (int i = 0; i < 10; i++) + { + json::value *element = arr->get (i); + ASSERT_EQ (element->get_kind (), JSON_INTEGER); + ASSERT_EQ (as_a <json::integer_number *> (element)->get (), i); + range = tc.get_range_for_value (element); + ASSERT_TRUE (range); + const int offset = 1 + (i * 3); + ASSERT_RANGE_EQ (*range, + offset, line_1, offset, + offset, line_1, offset); + } + ASSERT_PRINT_EQ (*jv, false, "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]"); +} + +/* Verify that JSON object parsing works. */ + +static void +test_parse_object () +{ + const int line_1 = 1; + std::unique_ptr<error> err; + /* 0 1 2 3 . */ + /* 01 2345 678 9012 345 6789 0123456789012. */ + parser_testcase tc ("{\"foo\": \"bar\", \"baz\": [42, null]}"); + + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_NE (jv, nullptr); + ASSERT_EQ (jv->get_kind (), JSON_OBJECT); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 32, line_1, 32); + const json::object *jo = static_cast <const json::object *> (jv); + + json::value *foo_value = jo->get ("foo"); + ASSERT_NE (foo_value, nullptr); + ASSERT_EQ (foo_value->get_kind (), JSON_STRING); + ASSERT_STREQ (as_a <json::string *> (foo_value)->get_string (), "bar"); + range = tc.get_range_for_value (foo_value); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 8, line_1, 8, + 12, line_1, 12); + + json::value *baz_value = jo->get ("baz"); + ASSERT_NE (baz_value, nullptr); + ASSERT_EQ (baz_value->get_kind (), JSON_ARRAY); + range = tc.get_range_for_value (baz_value); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 22, line_1, 22, + 31, line_1, 31); + + json::array *baz_array = as_a <json::array *> (baz_value); + ASSERT_EQ (baz_array->length (), 2); + + json::value *element0 = baz_array->get (0); + ASSERT_EQ (as_a <json::integer_number *> (element0)->get (), 42); + range = tc.get_range_for_value (element0); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 23, line_1, 23, + 24, line_1, 24); + + json::value *element1 = baz_array->get (1); + ASSERT_EQ (element1->get_kind (), JSON_NULL); + range = tc.get_range_for_value (element1); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 27, line_1, 27, + 30, line_1, 30); +} + +/* Verify that the JSON literals "true", "false" and "null" are parsed + correctly. */ + +static void +test_parse_literals () +{ + const int line_1 = 1; + { + parser_testcase tc ("true"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_NE (jv, nullptr); + ASSERT_EQ (jv->get_kind (), JSON_TRUE); + ASSERT_PRINT_EQ (*jv, false, "true"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 3, line_1, 3); + } + + { + parser_testcase tc ("false"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_NE (jv, nullptr); + ASSERT_EQ (jv->get_kind (), JSON_FALSE); + ASSERT_PRINT_EQ (*jv, false, "false"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 4, line_1, 4); + } + + { + parser_testcase tc ("null"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_NE (jv, nullptr); + ASSERT_EQ (jv->get_kind (), JSON_NULL); + ASSERT_PRINT_EQ (*jv, false, "null"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 3, line_1, 3); + } +} + +/* Verify that we can parse a simple JSON-RPC request. */ + +static void +test_parse_jsonrpc () +{ + std::unique_ptr<error> err; + const char *request + /* 0 1 2 3 4. */ + /* 01 23456789 012 3456 789 0123456 789 012345678 90. */ + = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\",\n" + /* 0 1 2 3 4. */ + /* 0 1234567 8901234567890 1234 56789012345678 90. */ + " \"params\": [42, 23], \"id\": 1}"); + const int line_1 = 1; + const int line_2 = 2; + const size_t line_2_offset = 41; + parser_testcase tc (request); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_NE (jv, nullptr); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + line_2_offset + 28, line_2, 28); +} + +/* Verify that we can parse an empty JSON object. */ + +static void +test_parse_empty_object () +{ + const int line_1 = 1; + std::unique_ptr<error> err; + parser_testcase tc ("{}"); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_NE (jv, nullptr); + ASSERT_EQ (jv->get_kind (), JSON_OBJECT); + ASSERT_PRINT_EQ (*jv, true, "{}"); + auto range = tc.get_range_for_value (jv); + ASSERT_TRUE (range); + ASSERT_RANGE_EQ (*range, + 0, line_1, 0, + 1, line_1, 1); +} + +/* Verify that comment-parsing can be enabled or disabled. */ + +static void +test_parsing_comments () +{ + const char *str = ("// foo\n" + "/*...\n" + "...*/ 42 // bar\n" + "/* etc */\n"); + + /* Parsing with comment support disabled. */ + { + parser_testcase tc (str); + ASSERT_NE (tc.get_error (), nullptr); + ASSERT_STREQ (tc.get_error ()->get_msg (), + "invalid JSON token: unexpected character: '/'"); + ASSERT_EQ (tc.get_value (), nullptr); + } + + /* Parsing with comment support enabled. */ + { + parser_testcase tc (str, true); + ASSERT_EQ (tc.get_error (), nullptr); + const json::value *jv = tc.get_value (); + ASSERT_NE (jv, nullptr); + ASSERT_EQ (jv->get_kind (), JSON_INTEGER); + ASSERT_EQ (((const json::integer_number *)jv)->get (), 42); + } +} + +/* Verify that we can parse an empty JSON string. */ + +static void +test_error_empty_string () +{ + const int line_1 = 1; + parser_testcase tc (""); + ASSERT_ERR_EQ (tc.get_error (), + 0, line_1, 0, + 0, line_1, 0, + "expected a JSON value but got EOF"); + ASSERT_EQ (tc.get_value (), nullptr); +} + +/* Verify that JSON parsing gracefully handles an invalid token. */ + +static void +test_error_bad_token () +{ + const int line_1 = 1; + parser_testcase tc (" not valid "); + ASSERT_ERR_EQ (tc.get_error (), + 2, line_1, 2, + 2, line_1, 2, + "invalid JSON token: unexpected character: 'n'"); + ASSERT_EQ (tc.get_value (), nullptr); +} + +/* Verify that JSON parsing gracefully handles a missing comma + within an object. */ + +static void +test_error_object_with_missing_comma () +{ + const int line_1 = 1; + /* 0 1 2. */ + /* 01 2345 6789012 3456 7890. */ + const char *json = "{\"foo\" : 42 \"bar\""; + parser_testcase tc (json); + ASSERT_ERR_EQ (tc.get_error (), + 12, line_1, 12, + 16, line_1, 16, + "expected ',' or '}'; got string"); + ASSERT_EQ (tc.get_value (), nullptr); +} + +/* Verify that JSON parsing gracefully handles a missing comma + within an array. */ + +static void +test_error_array_with_missing_comma () +{ + const int line_1 = 1; + /* 01234567. */ + const char *json = "[0, 1 42]"; + parser_testcase tc (json); + ASSERT_ERR_EQ (tc.get_error (), + 6, line_1, 6, + 7, line_1, 7, + "expected ',' or ']'; got number"); + ASSERT_EQ (tc.get_value (), nullptr); +} + +/* Run all of the selftests within this file. */ + +void +json_parser_cc_tests () +{ + test_lexer (); + test_lexing_unsupported_single_line_comment (); + test_lexing_unsupported_multiline_comment (); + test_lexing_supported_single_line_comment (); + test_lexing_supported_multiline_comment (); + test_parse_string (); + test_parse_number (); + test_parse_array (); + test_parse_object (); + test_parse_literals (); + test_parse_jsonrpc (); + test_parse_empty_object (); + test_parsing_comments (); + test_error_empty_string (); + test_error_bad_token (); + test_error_object_with_missing_comma (); + test_error_array_with_missing_comma (); +} + +} // namespace selftest + +#endif /* #if CHECKING_P */ diff --git a/gcc/json-parsing.h b/gcc/json-parsing.h new file mode 100644 index 000000000000..3dbbf22402e4 --- /dev/null +++ b/gcc/json-parsing.h @@ -0,0 +1,113 @@ +/* JSON parsing + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_JSON_PARSING_H +#define GCC_JSON_PARSING_H + +#include "json.h" + +namespace json +{ + +/* Declarations for parsing JSON to a json::value * tree. */ + +/* Abstract base class for recording what the locations of JSON values + were as they parsed. */ + +class location_map +{ +public: + /* A point within the JSON input file. */ + struct point + { + size_t m_unichar_idx; /* zero-based. */ + int m_line; /* one-based. */ + int m_column; /* zero-based unichar count. */ + }; + + /* A range of points within the JSON input file. + Both endpoints are part of the range. */ + struct range + { + point m_start; + point m_end; + }; + + virtual ~location_map () {} + virtual void record_range_for_value (json::value *jv, const range &r) = 0; + virtual void on_finished_parsing () {} +}; + +/* Class for recording an error within a JSON file. */ + +class error +{ +public: + error (const location_map::range &r, char *msg) + : m_range (r), m_msg (msg) + { + } + ~error () + { + free (m_msg); + } + + const location_map::range &get_range () const { return m_range; } + const char *get_msg () const { return m_msg; } + +private: + location_map::range m_range; + char *m_msg; +}; + +/* Class for the result of an operation: either a value or an error + (or both null for the case of "successful nullptr"). + The types must be default-constructible. */ + +template <typename ValueType, typename ErrorType> +struct result +{ + result (ValueType val) : m_val (std::move (val)), m_err () {} + result (ErrorType err) : m_val (), m_err (std::move (err)) {} + + ValueType m_val; + ErrorType m_err; +}; + +/* Typedef for the result of parsing JSON: ownership of either a + json::value * or of a json::error *. */ +typedef result<std::unique_ptr<value>, + std::unique_ptr<error>> parser_result_t; + +/* Functions for parsing JSON buffers. */ + +extern parser_result_t +parse_utf8_string (size_t length, + const char *utf8_buf, + bool allow_comments, + location_map *out_loc_map); +extern parser_result_t +parse_utf8_string (const char *utf8, + bool allow_comments, + location_map *out_loc_map); + +} // namespace json + +#endif /* GCC_JSON_PARSING_H */ diff --git a/gcc/json.cc b/gcc/json.cc index 275ef486faf1..4e4d43a7d100 100644 --- a/gcc/json.cc +++ b/gcc/json.cc @@ -383,7 +383,7 @@ namespace selftest { /* Verify that JV->print () prints EXPECTED_JSON. */ -static void +void assert_print_eq (const location &loc, const json::value &jv, bool formatted, diff --git a/gcc/json.h b/gcc/json.h index 21f71fe1c4ab..a761384af775 100644 --- a/gcc/json.h +++ b/gcc/json.h @@ -36,8 +36,8 @@ along with GCC; see the file COPYING3. If not see and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf and https://tools.ietf.org/html/rfc7159 - Supports creating a DOM-like tree of json::value *, and then dumping - json::value * to text. */ + Supports parsing text into a DOM-like tree of json::value *, directly + creating such trees, and dumping json::value * to text. */ /* TODO: `libcpp/mkdeps.cc` wants JSON writing support for p1689r5 output; extract this code and move to libiberty. */ @@ -175,6 +175,13 @@ class array : public value size_t size () const { return m_elements.length (); } value *operator[] (size_t i) const { return m_elements[i]; } + value **begin () { return m_elements.begin (); } + value **end () { return m_elements.end (); } + const value * const *begin () const { return m_elements.begin (); } + const value * const *end () const { return m_elements.end (); } + size_t length () const { return m_elements.length (); } + value *get (size_t idx) const { return m_elements[idx]; } + private: auto_vec<value *> m_elements; }; @@ -252,4 +259,115 @@ class literal : public value } // namespace json +template <> +template <> +inline bool +is_a_helper <json::value *>::test (json::value *) +{ + return true; +} + +template <> +template <> +inline bool +is_a_helper <const json::value *>::test (const json::value *) +{ + return true; +} + +template <> +template <> +inline bool +is_a_helper <json::object *>::test (json::value *jv) +{ + return jv->get_kind () == json::JSON_OBJECT; +} + +template <> +template <> +inline bool +is_a_helper <const json::object *>::test (const json::value *jv) +{ + return jv->get_kind () == json::JSON_OBJECT; +} + +template <> +template <> +inline bool +is_a_helper <json::array *>::test (json::value *jv) +{ + return jv->get_kind () == json::JSON_ARRAY; +} + +template <> +template <> +inline bool +is_a_helper <const json::array *>::test (const json::value *jv) +{ + return jv->get_kind () == json::JSON_ARRAY; +} + +template <> +template <> +inline bool +is_a_helper <json::float_number *>::test (json::value *jv) +{ + return jv->get_kind () == json::JSON_FLOAT; +} + +template <> +template <> +inline bool +is_a_helper <const json::float_number *>::test (const json::value *jv) +{ + return jv->get_kind () == json::JSON_FLOAT; +} + +template <> +template <> +inline bool +is_a_helper <json::integer_number *>::test (json::value *jv) +{ + return jv->get_kind () == json::JSON_INTEGER; +} + +template <> +template <> +inline bool +is_a_helper <const json::integer_number *>::test (const json::value *jv) +{ + return jv->get_kind () == json::JSON_INTEGER; +} + +template <> +template <> +inline bool +is_a_helper <json::string *>::test (json::value *jv) +{ + return jv->get_kind () == json::JSON_STRING; +} + +template <> +template <> +inline bool +is_a_helper <const json::string *>::test (const json::value *jv) +{ + return jv->get_kind () == json::JSON_STRING; +} + +#if CHECKING_P + +namespace selftest { + +class location; + +extern void assert_print_eq (const location &loc, + const json::value &jv, + bool formatted, + const char *expected_json); + +} // namespace selftest + +#endif /* #if CHECKING_P */ + #endif /* GCC_JSON_H */ diff --git a/gcc/selftest-run-tests.cc b/gcc/selftest-run-tests.cc index d6c88f864ba7..821f3fb7b25b 100644 --- a/gcc/selftest-run-tests.cc +++ b/gcc/selftest-run-tests.cc @@ -75,6 +75,7 @@ selftest::run_tests () opt_suggestions_cc_tests (); opts_cc_tests (); json_cc_tests (); + json_parser_cc_tests (); cgraph_cc_tests (); optinfo_emit_json_cc_tests (); ordered_hash_map_tests_cc_tests (); diff --git a/gcc/selftest.h b/gcc/selftest.h index 5afc9399c619..7a50d95df6b0 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -239,6 +239,7 @@ extern void hash_map_tests_cc_tests (); extern void hash_set_tests_cc_tests (); extern void input_cc_tests (); extern void json_cc_tests (); +extern void json_parser_cc_tests (); extern void optinfo_emit_json_cc_tests (); extern void opts_cc_tests (); extern void ordered_hash_map_tests_cc_tests ();

[6/7] json: add json parsing support

Commit Message

Patch