diff mbox

[v6,37/36] qapi: Support (subset of) \u escapes in strings

Message ID 1428697701-31743-1-git-send-email-eblake@redhat.com
State New
Headers show

Commit Message

Eric Blake April 10, 2015, 8:28 p.m. UTC
The handling of \ inside QAPI strings was less than ideal, and
really only worked JSON's \/, \\, \", and our extension of \'
(an obvious extension, when you realize we use '' instead of ""
for strings).  For other things, like '\n', it resulted in a
literal 'n' instead of a newline.

Of course, at the moment, we really have no use for escaped
characters, as QAPI has to map to C identifiers, and we currently
support ASCII only for that.  But down the road, we may add
support for default values for string parameters to a command
or struct; if that happens, it would be nice to correctly support
all JSON escape sequences, such as \n or \uXXXX.  This gets us
closer, by supporting Unicode escapes in the ASCII range.

Since JSON does not require \OCTAL or \xXX escapes, I did not
add it here, but it would be an easy addition if we desired it.

Signed-off-by: Eric Blake <eblake@redhat.com>
---

v6: new patch

---
 scripts/qapi.py                          | 33 +++++++++++++++++++++++++++++++-
 tests/Makefile                           |  1 +
 tests/qapi-schema/escape-too-big.err     |  1 +
 tests/qapi-schema/escape-too-big.exit    |  1 +
 tests/qapi-schema/escape-too-big.json    |  3 +++
 tests/qapi-schema/escape-too-big.out     |  0
 tests/qapi-schema/escape-too-short.err   |  1 +
 tests/qapi-schema/escape-too-short.exit  |  1 +
 tests/qapi-schema/escape-too-short.json  |  3 +++
 tests/qapi-schema/escape-too-short.out   |  0
 tests/qapi-schema/ident-with-escape.err  |  1 -
 tests/qapi-schema/ident-with-escape.exit |  2 +-
 tests/qapi-schema/ident-with-escape.json |  2 +-
 tests/qapi-schema/ident-with-escape.out  |  3 +++
 tests/qapi-schema/unicode-str.err        |  1 +
 tests/qapi-schema/unicode-str.exit       |  1 +
 tests/qapi-schema/unicode-str.json       |  2 ++
 tests/qapi-schema/unicode-str.out        |  0
 18 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 tests/qapi-schema/escape-too-big.err
 create mode 100644 tests/qapi-schema/escape-too-big.exit
 create mode 100644 tests/qapi-schema/escape-too-big.json
 create mode 100644 tests/qapi-schema/escape-too-big.out
 create mode 100644 tests/qapi-schema/escape-too-short.err
 create mode 100644 tests/qapi-schema/escape-too-short.exit
 create mode 100644 tests/qapi-schema/escape-too-short.json
 create mode 100644 tests/qapi-schema/escape-too-short.out
 create mode 100644 tests/qapi-schema/unicode-str.err
 create mode 100644 tests/qapi-schema/unicode-str.exit
 create mode 100644 tests/qapi-schema/unicode-str.json
 create mode 100644 tests/qapi-schema/unicode-str.out

diff --git a/tests/qapi-schema/unicode-str.out b/tests/qapi-schema/unicode-str.out
new file mode 100644
index 0000000..e69de29

Comments

Markus Armbruster April 28, 2015, 1:23 p.m. UTC | #1
Eric Blake <eblake@redhat.com> writes:

> The handling of \ inside QAPI strings was less than ideal, and
> really only worked JSON's \/, \\, \", and our extension of \'
> (an obvious extension, when you realize we use '' instead of ""
> for strings).  For other things, like '\n', it resulted in a
> literal 'n' instead of a newline.
>
> Of course, at the moment, we really have no use for escaped
> characters, as QAPI has to map to C identifiers, and we currently
> support ASCII only for that.  But down the road, we may add
> support for default values for string parameters to a command
> or struct; if that happens, it would be nice to correctly support
> all JSON escape sequences, such as \n or \uXXXX.  This gets us
> closer, by supporting Unicode escapes in the ASCII range.
>
> Since JSON does not require \OCTAL or \xXX escapes, I did not
> add it here, but it would be an easy addition if we desired it.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>
> v6: new patch
>
> ---
>  scripts/qapi.py                          | 33 +++++++++++++++++++++++++++++++-
>  tests/Makefile                           |  1 +
>  tests/qapi-schema/escape-too-big.err     |  1 +
>  tests/qapi-schema/escape-too-big.exit    |  1 +
>  tests/qapi-schema/escape-too-big.json    |  3 +++
>  tests/qapi-schema/escape-too-big.out     |  0
>  tests/qapi-schema/escape-too-short.err   |  1 +
>  tests/qapi-schema/escape-too-short.exit  |  1 +
>  tests/qapi-schema/escape-too-short.json  |  3 +++
>  tests/qapi-schema/escape-too-short.out   |  0
>  tests/qapi-schema/ident-with-escape.err  |  1 -
>  tests/qapi-schema/ident-with-escape.exit |  2 +-
>  tests/qapi-schema/ident-with-escape.json |  2 +-
>  tests/qapi-schema/ident-with-escape.out  |  3 +++
>  tests/qapi-schema/unicode-str.err        |  1 +
>  tests/qapi-schema/unicode-str.exit       |  1 +
>  tests/qapi-schema/unicode-str.json       |  2 ++
>  tests/qapi-schema/unicode-str.out        |  0
>  18 files changed, 52 insertions(+), 4 deletions(-)
>  create mode 100644 tests/qapi-schema/escape-too-big.err
>  create mode 100644 tests/qapi-schema/escape-too-big.exit
>  create mode 100644 tests/qapi-schema/escape-too-big.json
>  create mode 100644 tests/qapi-schema/escape-too-big.out
>  create mode 100644 tests/qapi-schema/escape-too-short.err
>  create mode 100644 tests/qapi-schema/escape-too-short.exit
>  create mode 100644 tests/qapi-schema/escape-too-short.json
>  create mode 100644 tests/qapi-schema/escape-too-short.out
>  create mode 100644 tests/qapi-schema/unicode-str.err
>  create mode 100644 tests/qapi-schema/unicode-str.exit
>  create mode 100644 tests/qapi-schema/unicode-str.json
>  create mode 100644 tests/qapi-schema/unicode-str.out
>
> diff --git a/scripts/qapi.py b/scripts/qapi.py
> index 60ed34a..853f9a3 100644
> --- a/scripts/qapi.py
> +++ b/scripts/qapi.py
> @@ -173,7 +173,38 @@ class QAPISchema:
>                          raise QAPISchemaError(self,
>                                                'Missing terminating "\'"')
>                      if esc:
> -                        string += ch
> +                        if ch == 'b':
> +                            string += '\b'
> +                        elif ch == 'f':
> +                            string += '\f'
> +                        elif ch == 'n':
> +                            string += '\n'
> +                        elif ch == 'r':
> +                            string += '\r'
> +                        elif ch == 't':
> +                            string += '\t'
> +                        elif ch == 'u':
> +                            value = 0
> +                            for x in range(0, 4):
> +                                ch = self.src[self.cursor]
> +                                self.cursor += 1
> +                                if ch not in "0123456789abcdefABCDEF":
> +                                    raise QAPISchemaError(self,
> +                                                          '\\u escape needs 4 '
> +                                                          'hex digits')
> +                                value = (value << 4) + int(ch, 16)
> +                            # If Python 2 and 3 didn't disagree so much on
> +                            # how to handle Unicode, then we could allow
> +                            # Unicode string defaults.  But most of QAPI is
> +                            # ASCII-only, so we aren't losing much for now.
> +                            if value > 0x7f:
> +                                raise QAPISchemaError(self,
> +                                                      'For now, \\u escape '
> +                                                      'only supports values '
> +                                                      'up to \\u007f')
> +                            string += chr(value)
> +                        else:
> +                            string += ch
>                          esc = False
>                      elif ch == "\\":
>                          esc = True

RFC 7159 accepts escapes ["\/bfnrtu], where u is followed by four
hexadecimal digits.

Our C JSON parser additionally accepts ', see json-lexer.c.

This code accepts any character.  I'd prefer to make it consistent with
our C JSON parser instead.

[...]
diff mbox

Patch

diff --git a/scripts/qapi.py b/scripts/qapi.py
index 60ed34a..853f9a3 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -173,7 +173,38 @@  class QAPISchema:
                         raise QAPISchemaError(self,
                                               'Missing terminating "\'"')
                     if esc:
-                        string += ch
+                        if ch == 'b':
+                            string += '\b'
+                        elif ch == 'f':
+                            string += '\f'
+                        elif ch == 'n':
+                            string += '\n'
+                        elif ch == 'r':
+                            string += '\r'
+                        elif ch == 't':
+                            string += '\t'
+                        elif ch == 'u':
+                            value = 0
+                            for x in range(0, 4):
+                                ch = self.src[self.cursor]
+                                self.cursor += 1
+                                if ch not in "0123456789abcdefABCDEF":
+                                    raise QAPISchemaError(self,
+                                                          '\\u escape needs 4 '
+                                                          'hex digits')
+                                value = (value << 4) + int(ch, 16)
+                            # If Python 2 and 3 didn't disagree so much on
+                            # how to handle Unicode, then we could allow
+                            # Unicode string defaults.  But most of QAPI is
+                            # ASCII-only, so we aren't losing much for now.
+                            if value > 0x7f:
+                                raise QAPISchemaError(self,
+                                                      'For now, \\u escape '
+                                                      'only supports values '
+                                                      'up to \\u007f')
+                            string += chr(value)
+                        else:
+                            string += ch
                         esc = False
                     elif ch == "\\":
                         esc = True
diff --git a/tests/Makefile b/tests/Makefile
index f37cd01..0cd114f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -212,6 +212,7 @@  check-qapi-schema-y := $(addprefix tests/qapi-schema/, \
 	enum-clash-member.json enum-max-member.json enum-union-clash.json \
 	enum-bad-name.json funny-char.json indented-expr.json \
 	missing-type.json bad-ident.json ident-with-escape.json \
+	escape-too-short.json escape-too-big.json unicode-str.json \
 	double-type.json bad-base.json bad-type-bool.json bad-type-int.json \
 	bad-type-dict.json double-data.json unknown-expr-key.json \
 	redefined-type.json redefined-command.json redefined-builtin.json \
diff --git a/tests/qapi-schema/escape-too-big.err b/tests/qapi-schema/escape-too-big.err
new file mode 100644
index 0000000..7f3976e
--- /dev/null
+++ b/tests/qapi-schema/escape-too-big.err
@@ -0,0 +1 @@ 
+tests/qapi-schema/escape-too-big.json:3:14: For now, \u escape only supports values up to \u007f
diff --git a/tests/qapi-schema/escape-too-big.exit b/tests/qapi-schema/escape-too-big.exit
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/tests/qapi-schema/escape-too-big.exit
@@ -0,0 +1 @@ 
+1
diff --git a/tests/qapi-schema/escape-too-big.json b/tests/qapi-schema/escape-too-big.json
new file mode 100644
index 0000000..62bcecd
--- /dev/null
+++ b/tests/qapi-schema/escape-too-big.json
@@ -0,0 +1,3 @@ 
+# we don't support full Unicode strings, yet
+# { 'command': 'é' }
+{ 'command': '\u00e9' }
diff --git a/tests/qapi-schema/escape-too-big.out b/tests/qapi-schema/escape-too-big.out
new file mode 100644
index 0000000..e69de29
diff --git a/tests/qapi-schema/escape-too-short.err b/tests/qapi-schema/escape-too-short.err
new file mode 100644
index 0000000..934de59
--- /dev/null
+++ b/tests/qapi-schema/escape-too-short.err
@@ -0,0 +1 @@ 
+tests/qapi-schema/escape-too-short.json:3:14: \u escape needs 4 hex digits
diff --git a/tests/qapi-schema/escape-too-short.exit b/tests/qapi-schema/escape-too-short.exit
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/tests/qapi-schema/escape-too-short.exit
@@ -0,0 +1 @@ 
+1
diff --git a/tests/qapi-schema/escape-too-short.json b/tests/qapi-schema/escape-too-short.json
new file mode 100644
index 0000000..6cb1dec
--- /dev/null
+++ b/tests/qapi-schema/escape-too-short.json
@@ -0,0 +1,3 @@ 
+# the \u escape requires 4 hex digits
+# { 'command': 'a' }
+{ 'command': '\u61' }
diff --git a/tests/qapi-schema/escape-too-short.out b/tests/qapi-schema/escape-too-short.out
new file mode 100644
index 0000000..e69de29
diff --git a/tests/qapi-schema/ident-with-escape.err b/tests/qapi-schema/ident-with-escape.err
index f7d1c55..e69de29 100644
--- a/tests/qapi-schema/ident-with-escape.err
+++ b/tests/qapi-schema/ident-with-escape.err
@@ -1 +0,0 @@ 
-tests/qapi-schema/ident-with-escape.json:3: Expression is missing metatype
diff --git a/tests/qapi-schema/ident-with-escape.exit b/tests/qapi-schema/ident-with-escape.exit
index d00491f..573541a 100644
--- a/tests/qapi-schema/ident-with-escape.exit
+++ b/tests/qapi-schema/ident-with-escape.exit
@@ -1 +1 @@ 
-1
+0
diff --git a/tests/qapi-schema/ident-with-escape.json b/tests/qapi-schema/ident-with-escape.json
index cfb2050..5661750 100644
--- a/tests/qapi-schema/ident-with-escape.json
+++ b/tests/qapi-schema/ident-with-escape.json
@@ -1,4 +1,4 @@ 
-# FIXME: we should allow escape sequences in strings, if they map back to ASCII
+# we allow escape sequences in strings, if they map back to ASCII
 # { 'command': 'fooA', 'data': { 'bar1': 'str' } }
 { 'c\u006fmmand': '\u0066\u006f\u006FA',
   'd\u0061ta': { '\u0062\u0061\u00721': '\u0073\u0074\u0072' } }
diff --git a/tests/qapi-schema/ident-with-escape.out b/tests/qapi-schema/ident-with-escape.out
index e69de29..4028430 100644
--- a/tests/qapi-schema/ident-with-escape.out
+++ b/tests/qapi-schema/ident-with-escape.out
@@ -0,0 +1,3 @@ 
+[OrderedDict([('command', 'fooA'), ('data', OrderedDict([('bar1', 'str')]))])]
+[]
+[]
diff --git a/tests/qapi-schema/unicode-str.err b/tests/qapi-schema/unicode-str.err
new file mode 100644
index 0000000..f621cd6
--- /dev/null
+++ b/tests/qapi-schema/unicode-str.err
@@ -0,0 +1 @@ 
+tests/qapi-schema/unicode-str.json:2: 'command' uses invalid name 'é'
diff --git a/tests/qapi-schema/unicode-str.exit b/tests/qapi-schema/unicode-str.exit
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/tests/qapi-schema/unicode-str.exit
@@ -0,0 +1 @@ 
+1
diff --git a/tests/qapi-schema/unicode-str.json b/tests/qapi-schema/unicode-str.json
new file mode 100644
index 0000000..5253a1b
--- /dev/null
+++ b/tests/qapi-schema/unicode-str.json
@@ -0,0 +1,2 @@ 
+# we don't support full Unicode strings, yet
+{ 'command': 'é' }