diff mbox

[v5,21/28] qapi: Require valid names

Message ID 1427227433-5030-22-git-send-email-eblake@redhat.com
State New
Headers show

Commit Message

Eric Blake March 24, 2015, 8:03 p.m. UTC
Previous commits demonstrated that the generator overlooked various
bad naming situations:
- types, commands, and events need a valid name
- union and alternate branches cannot be marked optional

The set of valid names includes [a-zA-Z0-9._-] (where '.' is
useful only in downstream extensions).

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 scripts/qapi.py                                    | 57 ++++++++++++++++------
 tests/qapi-schema/bad-ident.err                    |  1 +
 tests/qapi-schema/bad-ident.exit                   |  2 +-
 tests/qapi-schema/bad-ident.json                   |  2 +-
 tests/qapi-schema/bad-ident.out                    |  3 --
 tests/qapi-schema/flat-union-bad-discriminator.err |  2 +-
 .../flat-union-optional-discriminator.err          |  1 +
 .../flat-union-optional-discriminator.exit         |  2 +-
 .../flat-union-optional-discriminator.json         |  2 +-
 .../flat-union-optional-discriminator.out          |  5 --
 tests/qapi-schema/union-optional-branch.err        |  1 +
 tests/qapi-schema/union-optional-branch.exit       |  2 +-
 tests/qapi-schema/union-optional-branch.json       |  2 +-
 tests/qapi-schema/union-optional-branch.out        |  3 --
 14 files changed, 53 insertions(+), 32 deletions(-)

Comments

Markus Armbruster March 27, 2015, 8:48 a.m. UTC | #1
Eric Blake <eblake@redhat.com> writes:

> Previous commits demonstrated that the generator overlooked various
> bad naming situations:
> - types, commands, and events need a valid name
> - union and alternate branches cannot be marked optional
>
> The set of valid names includes [a-zA-Z0-9._-] (where '.' is
> useful only in downstream extensions).
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>  scripts/qapi.py                                    | 57 ++++++++++++++++------
>  tests/qapi-schema/bad-ident.err                    |  1 +
>  tests/qapi-schema/bad-ident.exit                   |  2 +-
>  tests/qapi-schema/bad-ident.json                   |  2 +-
>  tests/qapi-schema/bad-ident.out                    |  3 --
>  tests/qapi-schema/flat-union-bad-discriminator.err |  2 +-
>  .../flat-union-optional-discriminator.err          |  1 +
>  .../flat-union-optional-discriminator.exit         |  2 +-
>  .../flat-union-optional-discriminator.json         |  2 +-
>  .../flat-union-optional-discriminator.out          |  5 --
>  tests/qapi-schema/union-optional-branch.err        |  1 +
>  tests/qapi-schema/union-optional-branch.exit       |  2 +-
>  tests/qapi-schema/union-optional-branch.json       |  2 +-
>  tests/qapi-schema/union-optional-branch.out        |  3 --
>  14 files changed, 53 insertions(+), 32 deletions(-)
>
> diff --git a/scripts/qapi.py b/scripts/qapi.py
> index c42683b..ed5385a 100644
> --- a/scripts/qapi.py
> +++ b/scripts/qapi.py
> @@ -15,6 +15,7 @@ import re
>  from ordereddict import OrderedDict
>  import os
>  import sys
> +import string
>
>  builtin_types = {
>      'str':      'QTYPE_QSTRING',
> @@ -276,8 +277,27 @@ def discriminator_find_enum_define(expr):
>
>      return find_enum(discriminator_type)
>
> +valid_characters = set(string.ascii_letters + string.digits + '.' + '-' + '_')

strings.ascii_letters depends on the locale...

> +def check_name(expr_info, source, name, allow_optional = False):
> +    membername = name
> +
> +    if not isinstance(name, str):
> +        raise QAPIExprError(expr_info,
> +                            "%s requires a string name" % source)
> +    if name == '**':
> +        return

Doesn't this permit '**' anywhere, not just as pseudo-type in command
arguments and results?

> +    if name.startswith('*'):
> +        membername = name[1:]
> +        if not allow_optional:
> +            raise QAPIExprError(expr_info,
> +                                "%s does not allow optional name '%s'"
> +                                % (source, name))
> +    if not set(membername) <= valid_characters:

... so this check would break if we called locale.setlocale() in this
program.  While I don't think we need to worry about it, I think you
could just as well use something like

    valid_name = re.compile(r"^[A-Za-z0-9-._]+$")

    if not valid_name.match(membername):

> +        raise QAPIExprError(expr_info,
> +                            "%s uses invalid name '%s'" % (source, name))
> +
>  def check_type(expr_info, source, data, allow_array = False,
> -               allowed_metas = [], allow_dict = True):
> +               allowed_metas = [], allow_dict = True, allow_optional = False):
>      global all_names
>
>      if data is None:
> @@ -317,21 +337,23 @@ def check_type(expr_info, source, data, allow_array = False,
>          raise QAPIExprError(expr_info,
>                              "%s should be a type name" % source)
>      for (key, value) in data.items():
> +        check_name(expr_info, "Member of %s" % source, key,
> +                   allow_optional=allow_optional)
>          check_type(expr_info, "Member '%s' of %s" % (key, source), value,
>                     allow_array=True,
>                     allowed_metas=['built-in', 'union', 'alternate', 'struct',
>                                    'enum'],
> -                   allow_dict=True)
> +                   allow_dict=True, allow_optional=True)
>
>  def check_command(expr, expr_info):
>      name = expr['command']
>      check_type(expr_info, "'data' for command '%s'" % name,
>                 expr.get('data'),
> -               allowed_metas=['union', 'struct'])
> +               allowed_metas=['union', 'struct'], allow_optional=True)
>      check_type(expr_info, "'returns' for command '%s'" % name,
>                 expr.get('returns'), allow_array=True,
>                 allowed_metas=['built-in', 'union', 'alternate', 'struct',
> -                              'enum'])
> +                              'enum'], allow_optional=True)
>
>  def check_event(expr, expr_info):
>      global events
> @@ -345,7 +367,8 @@ def check_event(expr, expr_info):
>                              % name)
>      events.append(name)
>      check_type(expr_info, "'data' for event '%s'" % name,
> -               expr.get('data'), allowed_metas=['union', 'struct'])
> +               expr.get('data'), allowed_metas=['union', 'struct'],
> +               allow_optional=True)
>      if params:
>          for argname, argentry, optional, structured in parse_args(params):
>              if structured:
> @@ -385,12 +408,10 @@ def check_union(expr, expr_info):
>                                  "Base '%s' is not a valid base type"
>                                  % base)
>
> -        # The value of member 'discriminator' must name a member of the
> -        # base type.
> -        if not isinstance(discriminator, str):
> -            raise QAPIExprError(expr_info,
> -                                "Flat union '%s' must have a string "
> -                                "discriminator field" % name)
> +        # The value of member 'discriminator' must name a non-optional
> +        # member of the base type.
> +        check_name(expr_info, "Discriminator of flat union '%s'" % name,
> +                   discriminator)
>          discriminator_type = base_fields.get(discriminator)
>          if not discriminator_type:
>              raise QAPIExprError(expr_info,

What happens when I try 'discriminator': '**'?

> @@ -406,6 +427,8 @@ def check_union(expr, expr_info):
>
>      # Check every branch
>      for (key, value) in members.items():
> +        check_name(expr_info, "Member of union '%s'" % name, key)
> +
>          # Each value must name a known type
>          check_type(expr_info, "Member '%s' of union '%s'" % (key, name),
>                     value, allow_array=True,
> @@ -439,6 +462,8 @@ def check_alternate(expr, expr_info):
>
>      # Check every branch
>      for (key, value) in members.items():
> +        check_name(expr_info, "Member of alternate '%s'" % name, key)
> +
>          # Check for conflicts in the generated enum
>          c_key = _generate_enum_string(key)
>          if c_key in values:
> @@ -485,7 +510,8 @@ def check_struct(expr, expr_info):
>      name = expr['type']
>      members = expr['data']
>
> -    check_type(expr_info, "'data' for type '%s'" % name, members)
> +    check_type(expr_info, "'data' for type '%s'" % name, members,
> +               allow_optional=True)
>      check_type(expr_info, "'base' for type '%s'" % name, expr.get('base'),
>                 allowed_metas=['struct'], allow_dict=False)
>
> @@ -676,8 +702,11 @@ def type_name(name):
>          return c_list_type(name[0])
>      return name
>
> -def add_name(name, info, meta, implicit = False):
> +def add_name(name, info, meta, implicit = False, source = None):
>      global all_names
> +    if not source:
> +        source = "'%s'" % meta
> +    check_name(info, source, name)
>      if name in all_names:
>          raise QAPIExprError(info,
>                              "%s '%s' is already defined"
> @@ -691,7 +720,7 @@ def add_name(name, info, meta, implicit = False):
>  def add_struct(definition, info):
>      global struct_types
>      name = definition['type']
> -    add_name(name, info, 'struct')
> +    add_name(name, info, 'struct', source="'type'")
>      struct_types.append(definition)
>
>  def find_struct(name):
[...]
Markus Armbruster March 27, 2015, 5:14 p.m. UTC | #2
Eric Blake <eblake@redhat.com> writes:

> Previous commits demonstrated that the generator overlooked various
> bad naming situations:
> - types, commands, and events need a valid name
> - union and alternate branches cannot be marked optional
>
> The set of valid names includes [a-zA-Z0-9._-] (where '.' is
> useful only in downstream extensions).
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>  scripts/qapi.py                                    | 57 ++++++++++++++++------
>  tests/qapi-schema/bad-ident.err                    |  1 +
>  tests/qapi-schema/bad-ident.exit                   |  2 +-
>  tests/qapi-schema/bad-ident.json                   |  2 +-
>  tests/qapi-schema/bad-ident.out                    |  3 --
>  tests/qapi-schema/flat-union-bad-discriminator.err |  2 +-
>  .../flat-union-optional-discriminator.err          |  1 +
>  .../flat-union-optional-discriminator.exit         |  2 +-
>  .../flat-union-optional-discriminator.json         |  2 +-
>  .../flat-union-optional-discriminator.out          |  5 --
>  tests/qapi-schema/union-optional-branch.err        |  1 +
>  tests/qapi-schema/union-optional-branch.exit       |  2 +-
>  tests/qapi-schema/union-optional-branch.json       |  2 +-
>  tests/qapi-schema/union-optional-branch.out        |  3 --
>  14 files changed, 53 insertions(+), 32 deletions(-)
>
> diff --git a/scripts/qapi.py b/scripts/qapi.py
> index c42683b..ed5385a 100644
> --- a/scripts/qapi.py
> +++ b/scripts/qapi.py
> @@ -15,6 +15,7 @@ import re
>  from ordereddict import OrderedDict
>  import os
>  import sys
> +import string
>
>  builtin_types = {
>      'str':      'QTYPE_QSTRING',
> @@ -276,8 +277,27 @@ def discriminator_find_enum_define(expr):
>
>      return find_enum(discriminator_type)
>
> +valid_characters = set(string.ascii_letters + string.digits + '.' + '-' + '_')
> +def check_name(expr_info, source, name, allow_optional = False):
> +    membername = name
> +
> +    if not isinstance(name, str):
> +        raise QAPIExprError(expr_info,
> +                            "%s requires a string name" % source)
> +    if name == '**':
> +        return

I'm afraid this conditional is superfluous or wrong.  Our schemata don't
trigger it.

As far as I know, '**' may occur as pseudo-type in a command's 'data' or
'returns', and nowhere else.

Example 1 (qom-get):
  'returns': '**'

Example 2 (qom-set):
  'data': { 'path': 'str', 'property': 'str', 'value': '**' },

Example 3 (hypothetical)
  'returns': { 'frob': '**' }

Both 'data' and 'returns' are checked by check_type().

Example 1 takes the early exit on data = '**' there.

Example 2 goes through this loop

    for (key, value) in data.items():
        check_name(expr_info, "Member of %s" % source, key,
                   allow_optional=allow_optional)
        check_type(expr_info, "Member '%s' of %s" % (key, source), value,
                   allow_array=True,
                   allowed_metas=['built-in', 'union', 'alternate', 'struct',
                                  'enum'],
                   allow_dict=True, allow_optional=True)

The '**' is fed to check_type(), *not* to check_name().  check_type()
takes the same early exit.

[...]
Eric Blake March 27, 2015, 8:15 p.m. UTC | #3
On 03/27/2015 02:48 AM, Markus Armbruster wrote:
> Eric Blake <eblake@redhat.com> writes:
> 
>> Previous commits demonstrated that the generator overlooked various
>> bad naming situations:
>> - types, commands, and events need a valid name
>> - union and alternate branches cannot be marked optional
>>
>> The set of valid names includes [a-zA-Z0-9._-] (where '.' is
>> useful only in downstream extensions).
>>

>> +valid_characters = set(string.ascii_letters + string.digits + '.' + '-' + '_')
> 
> strings.ascii_letters depends on the locale...

https://docs.python.org/2/library/string.html

string.ascii_letters

    The concatenation of the ascii_lowercase and ascii_uppercase
constants described below. This value is not locale-dependent.

You are thinking of string.letters, which IS locale-dependent.  I
intentionally used ascii_letters.


> 
>> +def check_name(expr_info, source, name, allow_optional = False):
>> +    membername = name
>> +
>> +    if not isinstance(name, str):
>> +        raise QAPIExprError(expr_info,
>> +                            "%s requires a string name" % source)
>> +    if name == '**':
>> +        return
> 
> Doesn't this permit '**' anywhere, not just as pseudo-type in command
> arguments and results?

Yes, on the grounds that check_type then filters it appropriately.  But
worthy of a comment (probably both in the commit message AND in the code
base).  Grounds for a v6 respin.

> 
>> +    if name.startswith('*'):
>> +        membername = name[1:]
>> +        if not allow_optional:
>> +            raise QAPIExprError(expr_info,
>> +                                "%s does not allow optional name '%s'"
>> +                                % (source, name))
>> +    if not set(membername) <= valid_characters:
> 
> ... so this check would break if we called locale.setlocale() in this
> program.  While I don't think we need to worry about it, I think you
> could just as well use something like
> 
>     valid_name = re.compile(r"^[A-Za-z0-9-._]+$")
> 
>     if not valid_name.match(membername):

regex is slightly slower than string matching _if the regex is
precompiled_, and MUCH slower than string matching if the regex is
compiled every time.  In turn, string matching is slower than
open-coding things, but has the benefit of being more compact and
maintainable (open-coded loops are the worst on that front). Here's
where I got my inspiration:

https://stackoverflow.com/questions/1323364/in-python-how-to-check-if-a-string-only-contains-certain-characters

But I may just go with the regex after all (I don't know how efficient
python is about reusing a regex when a function is called multiple
times, rather than recompiling the regex every time.  Personal side
note: back in 2009 or so, I was able to make 'm4' significantly faster
in the context of 'autoconf' when I taught it to cache the compilation
of the 8 most-recently-encountered regex, rather than recompiling every
time; and then made 'autoconf' even faster when I taught it to do
actions that didn't require regex use from 'm4' in the first place.)

The nice thing, though, is that I factored things so that the
implementation of this one function can change without having to hunt
down all call-sites, if I keep the contract the same.


>>          discriminator_type = base_fields.get(discriminator)
>>          if not discriminator_type:
>>              raise QAPIExprError(expr_info,
> 
> What happens when I try 'discriminator': '**'?

No clue.  Good thing for me to add somewhere in my series.  However, I
did manage to have this series at least think about a base type with
'*switch':'Enum', then use 'discriminator':'*switch', which got past the
generator (who knows what the C code would have done if have_switch was
false?), so I plugged that hole; but in the process of testing it, I
noticed that '*switch':'Enum' paired with 'discriminator':'switch'
complained that 'switch' was not a member of the base class (which is a
lie; it is present in the base class, but as an optional member).  Proof
that the generator is a bunch of hacks strung together :)
Eric Blake March 27, 2015, 8:17 p.m. UTC | #4
On 03/27/2015 11:14 AM, Markus Armbruster wrote:
> Eric Blake <eblake@redhat.com> writes:
> 
>> Previous commits demonstrated that the generator overlooked various
>> bad naming situations:
>> - types, commands, and events need a valid name
>> - union and alternate branches cannot be marked optional
>>
>> The set of valid names includes [a-zA-Z0-9._-] (where '.' is
>> useful only in downstream extensions).
>>

>>
>> +valid_characters = set(string.ascii_letters + string.digits + '.' + '-' + '_')
>> +def check_name(expr_info, source, name, allow_optional = False):
>> +    membername = name
>> +
>> +    if not isinstance(name, str):
>> +        raise QAPIExprError(expr_info,
>> +                            "%s requires a string name" % source)
>> +    if name == '**':
>> +        return
> 
> I'm afraid this conditional is superfluous or wrong.  Our schemata don't
> trigger it.

Hmm, you're right.  Given a 'name':'type' pair in a dictionary,
check_name is used on the 'name' portion; but we only use 'name':'**'
(and NOT '**':'type') in the schemata.  I'll drop it in the respin.
Markus Armbruster March 29, 2015, 9:06 a.m. UTC | #5
Eric Blake <eblake@redhat.com> writes:

[...]
> +valid_characters = set(string.ascii_letters + string.digits + '.' + '-' + '_')
> +def check_name(expr_info, source, name, allow_optional = False):
> +    membername = name
> +
> +    if not isinstance(name, str):
> +        raise QAPIExprError(expr_info,
> +                            "%s requires a string name" % source)
> +    if name == '**':
> +        return
> +    if name.startswith('*'):
> +        membername = name[1:]
> +        if not allow_optional:
> +            raise QAPIExprError(expr_info,
> +                                "%s does not allow optional name '%s'"
> +                                % (source, name))
> +    if not set(membername) <= valid_characters:
> +        raise QAPIExprError(expr_info,
> +                            "%s uses invalid name '%s'" % (source, name))
> +

This accepts names starting with a digit.  Sure we generate valid C
identifiers for such beauties?

qapi-code-gen.txt:

    Downstream vendors may add extensions; such extensions should begin
    with a prefix matching "__RFQDN_" (for the reverse-fully-qualified-
    domain-name of the vendor), even if the rest of the command or field
    name uses dash (example: __com.redhat_drive-mirror).  Other than the
    dots used in RFQDN of a downstream extension, all command, type, and
    field names should begin with a letter, and contain only ASCII
    letters, numbers, dash, and underscore.

One, I think "all command, type, and field names" is too narrow, what
about event names, or enumeration value names?  Suggest say just "all
names".

Two, "letters, digits, dash, and underscore", please.

Three, I think check_name() should enforce "starts with letter or '_'".

[...]
Markus Armbruster March 29, 2015, 10:17 a.m. UTC | #6
Eric Blake <eblake@redhat.com> writes:

> On 03/27/2015 02:48 AM, Markus Armbruster wrote:
>> Eric Blake <eblake@redhat.com> writes:
>> 
>>> Previous commits demonstrated that the generator overlooked various
>>> bad naming situations:
>>> - types, commands, and events need a valid name
>>> - union and alternate branches cannot be marked optional
>>>
>>> The set of valid names includes [a-zA-Z0-9._-] (where '.' is
>>> useful only in downstream extensions).
>>>
>
>>> +valid_characters = set(string.ascii_letters + string.digits + '.'
>>> + '-' + '_')
>> 
>> strings.ascii_letters depends on the locale...
>
> https://docs.python.org/2/library/string.html
>
> string.ascii_letters
>
>     The concatenation of the ascii_lowercase and ascii_uppercase
> constants described below. This value is not locale-dependent.
>
> You are thinking of string.letters, which IS locale-dependent.  I
> intentionally used ascii_letters.

You're right, I misread the docs.

>>> +def check_name(expr_info, source, name, allow_optional = False):
>>> +    membername = name
>>> +
>>> +    if not isinstance(name, str):
>>> +        raise QAPIExprError(expr_info,
>>> +                            "%s requires a string name" % source)
>>> +    if name == '**':
>>> +        return
>> 
>> Doesn't this permit '**' anywhere, not just as pseudo-type in command
>> arguments and results?
>
> Yes, on the grounds that check_type then filters it appropriately.  But
> worthy of a comment (probably both in the commit message AND in the code
> base).  Grounds for a v6 respin.
>
>> 
>>> +    if name.startswith('*'):
>>> +        membername = name[1:]
>>> +        if not allow_optional:
>>> +            raise QAPIExprError(expr_info,
>>> +                                "%s does not allow optional name '%s'"
>>> +                                % (source, name))
>>> +    if not set(membername) <= valid_characters:
>> 
>> ... so this check would break if we called locale.setlocale() in this
>> program.  While I don't think we need to worry about it, I think you
>> could just as well use something like
>> 
>>     valid_name = re.compile(r"^[A-Za-z0-9-._]+$")
>> 
>>     if not valid_name.match(membername):
>
> regex is slightly slower than string matching _if the regex is
> precompiled_, and MUCH slower than string matching if the regex is
> compiled every time.  In turn, string matching is slower than
> open-coding things, but has the benefit of being more compact and
> maintainable (open-coded loops are the worst on that front). Here's
> where I got my inspiration:
>
> https://stackoverflow.com/questions/1323364/in-python-how-to-check-if-a-string-only-contains-certain-characters
>
> But I may just go with the regex after all (I don't know how efficient
> python is about reusing a regex when a function is called multiple
> times, rather than recompiling the regex every time.  Personal side
> note: back in 2009 or so, I was able to make 'm4' significantly faster
> in the context of 'autoconf' when I taught it to cache the compilation
> of the 8 most-recently-encountered regex, rather than recompiling every
> time; and then made 'autoconf' even faster when I taught it to do
> actions that didn't require regex use from 'm4' in the first place.)

Neat.

> The nice thing, though, is that I factored things so that the
> implementation of this one function can change without having to hunt
> down all call-sites, if I keep the contract the same.

I don't care for matching performance here, I do care for readability.
Please pick the solution you find easiest to understand.

>>>          discriminator_type = base_fields.get(discriminator)
>>>          if not discriminator_type:
>>>              raise QAPIExprError(expr_info,
>> 
>> What happens when I try 'discriminator': '**'?
>
> No clue.  Good thing for me to add somewhere in my series.  However, I

I had a second look.  I think the generator accepting '**' in exactly
the right places relies on:

(1) check_name() accepts only proper names, not '**'.

(2) All names get checked with check_name().

(3) Except check_type() accepts special type name '**' when allow_star.

(4) allow_star is set for exactly the right places.

With check_name()'s superfluous / incorrect '**' check gone, (1)
obviously holds.  (2) isn't obvious, thus food for code review.  (3) is
trivial.  (4) is trivial except for "exactly the right places".
qapi-code-gen.txt:

    In rare cases, QAPI cannot express a type-safe representation of a
    corresponding Client JSON Protocol command.  In these cases, if the
    command expression includes the key 'gen' with boolean value false,
    then the 'data' or 'returns' member that intends to bypass generated
    type-safety and do its own manual validation should use '**' rather
    than a valid type name.

Unfortunately, "the 'data' or 'returns' member that intends to bypass
[...] should use '**' rather than a valid type name" can be read in (at
least) two ways:

1. It applies to the 'data', 'returns' members of the command object.

2. It applies to members of 'data', 'returns' members of the command
object.

The schema uses both readings: qom-get has 'returns': '**', and qom-set,
netdev_add, object_add have 'data' members of the form KEY: '**'.

Note that neither code nor tests have 'data': '**' or KEY: '**' in
'returns'.

qapi.py appears to implement a third way: '**' may appear as type name
anywhere within 'data' or 'returns'.  May well make sense, and may well
work, but we have no test coverage for it.

We can extend tests to cover what the generator accepts (separate series
recommended), or restrict the generator to what we actually use and
test.  I'm leaning towards the latter.

Further note that '**' can only be used right in a command declaration.
Factoring out the right hand side of 'data' or 'returns' into a separate
struct type declaration isn't possible when it contains '**'.

> did manage to have this series at least think about a base type with
> '*switch':'Enum', then use 'discriminator':'*switch', which got past the
> generator (who knows what the C code would have done if have_switch was
> false?), so I plugged that hole; but in the process of testing it, I
> noticed that '*switch':'Enum' paired with 'discriminator':'switch'
> complained that 'switch' was not a member of the base class (which is a
> lie; it is present in the base class, but as an optional member).  Proof
> that the generator is a bunch of hacks strung together :)

Indeed!
Markus Armbruster March 29, 2015, 2:23 p.m. UTC | #7
Markus Armbruster <armbru@redhat.com> writes:

[...]
> I had a second look.  I think the generator accepting '**' in exactly
> the right places relies on:
>
> (1) check_name() accepts only proper names, not '**'.
>
> (2) All names get checked with check_name().
>
> (3) Except check_type() accepts special type name '**' when allow_star.
>
> (4) allow_star is set for exactly the right places.
>
> With check_name()'s superfluous / incorrect '**' check gone, (1)
> obviously holds.  (2) isn't obvious, thus food for code review.  (3) is
> trivial.  (4) is trivial except for "exactly the right places".
> qapi-code-gen.txt:
>
>     In rare cases, QAPI cannot express a type-safe representation of a
>     corresponding Client JSON Protocol command.  In these cases, if the
>     command expression includes the key 'gen' with boolean value false,
>     then the 'data' or 'returns' member that intends to bypass generated
>     type-safety and do its own manual validation should use '**' rather
>     than a valid type name.
>
> Unfortunately, "the 'data' or 'returns' member that intends to bypass
> [...] should use '**' rather than a valid type name" can be read in (at
> least) two ways:
>
> 1. It applies to the 'data', 'returns' members of the command object.
>
> 2. It applies to members of 'data', 'returns' members of the command
> object.
>
> The schema uses both readings: qom-get has 'returns': '**', and qom-set,
> netdev_add, object_add have 'data' members of the form KEY: '**'.
>
> Note that neither code nor tests have 'data': '**' or KEY: '**' in
> 'returns'.

Type '**' generally means "any JSON value".  However, a command's 'data'
must be a JSON object (see docs/qmp/qmp-spec.txt).  Ways to deal with
this:

A. Ignore.  Conforming to the schema is necessary but not sufficient for
   a command being accepted anyway.

B. The meaning of type '**' depends on context: it's "any JSON object"
   for a command's 'data', else "any JSON value".

C. Outlaw 'data': '**' for now.

I prefer C, I can accept A, I dislike B.

> qapi.py appears to implement a third way: '**' may appear as type name
> anywhere within 'data' or 'returns'.  May well make sense, and may well
> work, but we have no test coverage for it.
>
> We can extend tests to cover what the generator accepts (separate series
> recommended), or restrict the generator to what we actually use and
> test.  I'm leaning towards the latter.
>
> Further note that '**' can only be used right in a command declaration.
> Factoring out the right hand side of 'data' or 'returns' into a separate
> struct type declaration isn't possible when it contains '**'.

We could treat '**' as top type rather than as hack for a few commands.
Conceptually clean, but hard to justify without users.

[...]
diff mbox

Patch

diff --git a/scripts/qapi.py b/scripts/qapi.py
index c42683b..ed5385a 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -15,6 +15,7 @@  import re
 from ordereddict import OrderedDict
 import os
 import sys
+import string

 builtin_types = {
     'str':      'QTYPE_QSTRING',
@@ -276,8 +277,27 @@  def discriminator_find_enum_define(expr):

     return find_enum(discriminator_type)

+valid_characters = set(string.ascii_letters + string.digits + '.' + '-' + '_')
+def check_name(expr_info, source, name, allow_optional = False):
+    membername = name
+
+    if not isinstance(name, str):
+        raise QAPIExprError(expr_info,
+                            "%s requires a string name" % source)
+    if name == '**':
+        return
+    if name.startswith('*'):
+        membername = name[1:]
+        if not allow_optional:
+            raise QAPIExprError(expr_info,
+                                "%s does not allow optional name '%s'"
+                                % (source, name))
+    if not set(membername) <= valid_characters:
+        raise QAPIExprError(expr_info,
+                            "%s uses invalid name '%s'" % (source, name))
+
 def check_type(expr_info, source, data, allow_array = False,
-               allowed_metas = [], allow_dict = True):
+               allowed_metas = [], allow_dict = True, allow_optional = False):
     global all_names

     if data is None:
@@ -317,21 +337,23 @@  def check_type(expr_info, source, data, allow_array = False,
         raise QAPIExprError(expr_info,
                             "%s should be a type name" % source)
     for (key, value) in data.items():
+        check_name(expr_info, "Member of %s" % source, key,
+                   allow_optional=allow_optional)
         check_type(expr_info, "Member '%s' of %s" % (key, source), value,
                    allow_array=True,
                    allowed_metas=['built-in', 'union', 'alternate', 'struct',
                                   'enum'],
-                   allow_dict=True)
+                   allow_dict=True, allow_optional=True)

 def check_command(expr, expr_info):
     name = expr['command']
     check_type(expr_info, "'data' for command '%s'" % name,
                expr.get('data'),
-               allowed_metas=['union', 'struct'])
+               allowed_metas=['union', 'struct'], allow_optional=True)
     check_type(expr_info, "'returns' for command '%s'" % name,
                expr.get('returns'), allow_array=True,
                allowed_metas=['built-in', 'union', 'alternate', 'struct',
-                              'enum'])
+                              'enum'], allow_optional=True)

 def check_event(expr, expr_info):
     global events
@@ -345,7 +367,8 @@  def check_event(expr, expr_info):
                             % name)
     events.append(name)
     check_type(expr_info, "'data' for event '%s'" % name,
-               expr.get('data'), allowed_metas=['union', 'struct'])
+               expr.get('data'), allowed_metas=['union', 'struct'],
+               allow_optional=True)
     if params:
         for argname, argentry, optional, structured in parse_args(params):
             if structured:
@@ -385,12 +408,10 @@  def check_union(expr, expr_info):
                                 "Base '%s' is not a valid base type"
                                 % base)

-        # The value of member 'discriminator' must name a member of the
-        # base type.
-        if not isinstance(discriminator, str):
-            raise QAPIExprError(expr_info,
-                                "Flat union '%s' must have a string "
-                                "discriminator field" % name)
+        # The value of member 'discriminator' must name a non-optional
+        # member of the base type.
+        check_name(expr_info, "Discriminator of flat union '%s'" % name,
+                   discriminator)
         discriminator_type = base_fields.get(discriminator)
         if not discriminator_type:
             raise QAPIExprError(expr_info,
@@ -406,6 +427,8 @@  def check_union(expr, expr_info):

     # Check every branch
     for (key, value) in members.items():
+        check_name(expr_info, "Member of union '%s'" % name, key)
+
         # Each value must name a known type
         check_type(expr_info, "Member '%s' of union '%s'" % (key, name),
                    value, allow_array=True,
@@ -439,6 +462,8 @@  def check_alternate(expr, expr_info):

     # Check every branch
     for (key, value) in members.items():
+        check_name(expr_info, "Member of alternate '%s'" % name, key)
+
         # Check for conflicts in the generated enum
         c_key = _generate_enum_string(key)
         if c_key in values:
@@ -485,7 +510,8 @@  def check_struct(expr, expr_info):
     name = expr['type']
     members = expr['data']

-    check_type(expr_info, "'data' for type '%s'" % name, members)
+    check_type(expr_info, "'data' for type '%s'" % name, members,
+               allow_optional=True)
     check_type(expr_info, "'base' for type '%s'" % name, expr.get('base'),
                allowed_metas=['struct'], allow_dict=False)

@@ -676,8 +702,11 @@  def type_name(name):
         return c_list_type(name[0])
     return name

-def add_name(name, info, meta, implicit = False):
+def add_name(name, info, meta, implicit = False, source = None):
     global all_names
+    if not source:
+        source = "'%s'" % meta
+    check_name(info, source, name)
     if name in all_names:
         raise QAPIExprError(info,
                             "%s '%s' is already defined"
@@ -691,7 +720,7 @@  def add_name(name, info, meta, implicit = False):
 def add_struct(definition, info):
     global struct_types
     name = definition['type']
-    add_name(name, info, 'struct')
+    add_name(name, info, 'struct', source="'type'")
     struct_types.append(definition)

 def find_struct(name):
diff --git a/tests/qapi-schema/bad-ident.err b/tests/qapi-schema/bad-ident.err
index e69de29..42b490c 100644
--- a/tests/qapi-schema/bad-ident.err
+++ b/tests/qapi-schema/bad-ident.err
@@ -0,0 +1 @@ 
+tests/qapi-schema/bad-ident.json:2: 'type' does not allow optional name '*oops'
diff --git a/tests/qapi-schema/bad-ident.exit b/tests/qapi-schema/bad-ident.exit
index 573541a..d00491f 100644
--- a/tests/qapi-schema/bad-ident.exit
+++ b/tests/qapi-schema/bad-ident.exit
@@ -1 +1 @@ 
-0
+1
diff --git a/tests/qapi-schema/bad-ident.json b/tests/qapi-schema/bad-ident.json
index 66333a7..7418364 100644
--- a/tests/qapi-schema/bad-ident.json
+++ b/tests/qapi-schema/bad-ident.json
@@ -1,3 +1,3 @@ 
-# FIXME: we should reject creating a type name with bad name
+# we reject creating a type name with bad name
 { 'type': '*oops', 'data': { 'i': 'int' } }

diff --git a/tests/qapi-schema/bad-ident.out b/tests/qapi-schema/bad-ident.out
index 165e346..e69de29 100644
--- a/tests/qapi-schema/bad-ident.out
+++ b/tests/qapi-schema/bad-ident.out
@@ -1,3 +0,0 @@ 
-[OrderedDict([('type', '*oops'), ('data', OrderedDict([('i', 'int')]))])]
-[]
-[OrderedDict([('type', '*oops'), ('data', OrderedDict([('i', 'int')]))])]
diff --git a/tests/qapi-schema/flat-union-bad-discriminator.err b/tests/qapi-schema/flat-union-bad-discriminator.err
index 4f0c798..c38cc8e 100644
--- a/tests/qapi-schema/flat-union-bad-discriminator.err
+++ b/tests/qapi-schema/flat-union-bad-discriminator.err
@@ -1 +1 @@ 
-tests/qapi-schema/flat-union-bad-discriminator.json:11: Flat union 'TestUnion' must have a string discriminator field
+tests/qapi-schema/flat-union-bad-discriminator.json:11: Discriminator of flat union 'TestUnion' requires a string name
diff --git a/tests/qapi-schema/flat-union-optional-discriminator.err b/tests/qapi-schema/flat-union-optional-discriminator.err
index e69de29..df290ce 100644
--- a/tests/qapi-schema/flat-union-optional-discriminator.err
+++ b/tests/qapi-schema/flat-union-optional-discriminator.err
@@ -0,0 +1 @@ 
+tests/qapi-schema/flat-union-optional-discriminator.json:5: Discriminator of flat union 'MyUnion' does not allow optional name '*switch'
diff --git a/tests/qapi-schema/flat-union-optional-discriminator.exit b/tests/qapi-schema/flat-union-optional-discriminator.exit
index 573541a..d00491f 100644
--- a/tests/qapi-schema/flat-union-optional-discriminator.exit
+++ b/tests/qapi-schema/flat-union-optional-discriminator.exit
@@ -1 +1 @@ 
-0
+1
diff --git a/tests/qapi-schema/flat-union-optional-discriminator.json b/tests/qapi-schema/flat-union-optional-discriminator.json
index 4957c72..cbf0afa 100644
--- a/tests/qapi-schema/flat-union-optional-discriminator.json
+++ b/tests/qapi-schema/flat-union-optional-discriminator.json
@@ -1,4 +1,4 @@ 
-# FIXME: we should require the discriminator to be non-optional
+# we require the discriminator to be non-optional
 { 'enum': 'Enum', 'data': [ 'one', 'two' ] }
 { 'type': 'Base',
   'data': { '*switch': 'Enum' } }
diff --git a/tests/qapi-schema/flat-union-optional-discriminator.out b/tests/qapi-schema/flat-union-optional-discriminator.out
index f4b6bed..e69de29 100644
--- a/tests/qapi-schema/flat-union-optional-discriminator.out
+++ b/tests/qapi-schema/flat-union-optional-discriminator.out
@@ -1,5 +0,0 @@ 
-[OrderedDict([('enum', 'Enum'), ('data', ['one', 'two'])]),
- OrderedDict([('type', 'Base'), ('data', OrderedDict([('*switch', 'Enum')]))]),
- OrderedDict([('union', 'MyUnion'), ('base', 'Base'), ('discriminator', '*switch'), ('data', OrderedDict([('one', 'int'), ('two', 'str')]))])]
-[{'enum_name': 'Enum', 'enum_values': ['one', 'two']}]
-[OrderedDict([('type', 'Base'), ('data', OrderedDict([('*switch', 'Enum')]))])]
diff --git a/tests/qapi-schema/union-optional-branch.err b/tests/qapi-schema/union-optional-branch.err
index e69de29..3ada133 100644
--- a/tests/qapi-schema/union-optional-branch.err
+++ b/tests/qapi-schema/union-optional-branch.err
@@ -0,0 +1 @@ 
+tests/qapi-schema/union-optional-branch.json:2: Member of union 'Union' does not allow optional name '*a'
diff --git a/tests/qapi-schema/union-optional-branch.exit b/tests/qapi-schema/union-optional-branch.exit
index 573541a..d00491f 100644
--- a/tests/qapi-schema/union-optional-branch.exit
+++ b/tests/qapi-schema/union-optional-branch.exit
@@ -1 +1 @@ 
-0
+1
diff --git a/tests/qapi-schema/union-optional-branch.json b/tests/qapi-schema/union-optional-branch.json
index c513db7..591615f 100644
--- a/tests/qapi-schema/union-optional-branch.json
+++ b/tests/qapi-schema/union-optional-branch.json
@@ -1,2 +1,2 @@ 
-# FIXME: union branches cannot be optional
+# union branches cannot be optional
 { 'union': 'Union', 'data': { '*a': 'int', 'b': 'str' } }
diff --git a/tests/qapi-schema/union-optional-branch.out b/tests/qapi-schema/union-optional-branch.out
index b03b5d2..e69de29 100644
--- a/tests/qapi-schema/union-optional-branch.out
+++ b/tests/qapi-schema/union-optional-branch.out
@@ -1,3 +0,0 @@ 
-[OrderedDict([('union', 'Union'), ('data', OrderedDict([('*a', 'int'), ('b', 'str')]))])]
-[{'enum_name': 'UnionKind', 'enum_values': None}]
-[]