diff mbox series

[RFC,v2,4/8] qapi: golang: Generate qapi's union types in Go

Message ID 20220617121932.249381-5-victortoso@redhat.com
State New
Headers show
Series qapi: add generator for Golang interface | expand

Commit Message

Victor Toso June 17, 2022, 12:19 p.m. UTC
This patch handles QAPI union types and generates the equivalent data
structures and methods in Go to handle it.

At the moment of this writing, it generates 38 structures.

The QAPI union type has two types of fields: The @base and the
@variants members. The @base fields can be considered common members
for the union while only one field maximum is set for the @variants.

In the QAPI specification, it defines a @discriminator field, which is
an Enum type. The purpose of the  @discriminator is to identify which
@variant type is being used. The @discriminator is not used in the
generated union Go structs as it suffices to check which one of the
@variants fields were set.

The union types implement the Marshaler and Unmarshaler interfaces to
seamless decode from JSON objects to Golang structs and vice versa.

qapi:
  | { 'union': 'SetPasswordOptions',
  |   'base': { 'protocol': 'DisplayProtocol',
  |             'password': 'str',
  |             '*connected': 'SetPasswordAction' },
  |   'discriminator': 'protocol',
  |   'data': { 'vnc': 'SetPasswordOptionsVnc' } }

go:
  | type SetPasswordOptions struct {
  | 	// Base fields
  | 	Password  string             `json:"password"`
  | 	Connected *SetPasswordAction `json:"connected,omitempty"`
  |
  | 	// Variants fields
  | 	Vnc *SetPasswordOptionsVnc `json:"-"`
  | }

Signed-off-by: Victor Toso <victortoso@redhat.com>
---
 scripts/qapi/golang.py | 112 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 105 insertions(+), 7 deletions(-)

Comments

Andrea Bolognani July 5, 2022, 3:45 p.m. UTC | #1
On Fri, Jun 17, 2022 at 02:19:28PM +0200, Victor Toso wrote:
> qapi:
>   | { 'union': 'SetPasswordOptions',
>   |   'base': { 'protocol': 'DisplayProtocol',
>   |             'password': 'str',
>   |             '*connected': 'SetPasswordAction' },
>   |   'discriminator': 'protocol',
>   |   'data': { 'vnc': 'SetPasswordOptionsVnc' } }
>
> go:
>   | type SetPasswordOptions struct {
>   | 	// Base fields
>   | 	Password  string             `json:"password"`
>   | 	Connected *SetPasswordAction `json:"connected,omitempty"`
>   |
>   | 	// Variants fields
>   | 	Vnc *SetPasswordOptionsVnc `json:"-"`
>   | }

Generated code:

> type GuestPanicInformation struct {
>     // Variants fields
>     HyperV *GuestPanicInformationHyperV `json:"-"`
>     S390   *GuestPanicInformationS390   `json:"-"`
> }
>
> func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
>     type Alias GuestPanicInformation
>     alias := Alias(s)
>     base, err := json.Marshal(alias)
>     if err != nil {
>         return nil, err
>     }
>
>     driver := ""
>     payload := []byte{}
>     if s.HyperV != nil {
>         driver = "hyper-v"
>         payload, err = json.Marshal(s.HyperV)
>     } else if s.S390 != nil {
>         driver = "s390"
>         payload, err = json.Marshal(s.S390)
>     }
>
>     if err != nil {
>         return nil, err
>     }
>
>     if len(base) == len("{}") {
>         base = []byte("")
>     } else {
>         base = append([]byte{','}, base[1:len(base)-1]...)
>     }
>
>     if len(payload) == 0 || len(payload) == len("{}") {
>         payload = []byte("")
>     } else {
>         payload = append([]byte{','}, payload[1:len(payload)-1]...)
>     }
>
>     result := fmt.Sprintf(`{"type":"%s"%s%s}`, driver, base, payload)
>     return []byte(result), nil

All this string manipulation looks sketchy. Is there some reason that
I'm not seeing preventing you for doing something like the untested
code below?

  func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
      if s.HyperV != nil {
          type union struct {
              Discriminator string                      `json:"type"`
              HyperV        GuestPanicInformationHyperV `json:"hyper-v"`
          }
          tmp := union {
              Discriminator: "hyper-v",
              HyperV:        s.HyperV,
          }
          return json.Marshal(tmp)
      } else if s.S390 != nil {
          type union struct {
              Discriminator string                      `json:"type"`
              S390          GuestPanicInformationHyperV `json:"s390"`
          }
          tmp := union {
              Discriminator: "s390",
              S390:          s.S390,
          }
          return json.Marshal(tmp)
      }
      return nil, errors.New("...")
  }

> func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
>     type Alias GuestPanicInformation
>     peek := struct {
>         Alias
>         Driver string `json:"type"`
>     }{}
>
>     if err := json.Unmarshal(data, &peek); err != nil {
>         return err
>     }
>     *s = GuestPanicInformation(peek.Alias)
>
>     switch peek.Driver {
>
>     case "hyper-v":
>         s.HyperV = new(GuestPanicInformationHyperV)
>         if err := json.Unmarshal(data, s.HyperV); err != nil {
>             s.HyperV = nil
>             return err
>         }
>     case "s390":
>         s.S390 = new(GuestPanicInformationS390)
>         if err := json.Unmarshal(data, s.S390); err != nil {
>             s.S390 = nil
>             return err
>         }
>     }
>     // Unrecognizer drivers are silently ignored.
>     return nil

This looks pretty reasonable, but since you're only using "peek" to
look at the discriminator you should be able to leave out the Alias
type entirely and perform the initial Unmarshal operation while
ignoring all other fields.

The comments made elsewhere about potentially being more strict and
rejecting invalid JSON also apply here.
Daniel P. Berrangé July 5, 2022, 4:35 p.m. UTC | #2
On Tue, Jul 05, 2022 at 08:45:30AM -0700, Andrea Bolognani wrote:
> On Fri, Jun 17, 2022 at 02:19:28PM +0200, Victor Toso wrote:
> > qapi:
> >   | { 'union': 'SetPasswordOptions',
> >   |   'base': { 'protocol': 'DisplayProtocol',
> >   |             'password': 'str',
> >   |             '*connected': 'SetPasswordAction' },
> >   |   'discriminator': 'protocol',
> >   |   'data': { 'vnc': 'SetPasswordOptionsVnc' } }
> >
> > go:
> >   | type SetPasswordOptions struct {
> >   | 	// Base fields
> >   | 	Password  string             `json:"password"`
> >   | 	Connected *SetPasswordAction `json:"connected,omitempty"`
> >   |
> >   | 	// Variants fields
> >   | 	Vnc *SetPasswordOptionsVnc `json:"-"`
> >   | }
> 
> Generated code:
> 
> > type GuestPanicInformation struct {
> >     // Variants fields
> >     HyperV *GuestPanicInformationHyperV `json:"-"`
> >     S390   *GuestPanicInformationS390   `json:"-"`
> > }
> >
> > func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
> >     type Alias GuestPanicInformation
> >     alias := Alias(s)
> >     base, err := json.Marshal(alias)
> >     if err != nil {
> >         return nil, err
> >     }
> >
> >     driver := ""
> >     payload := []byte{}
> >     if s.HyperV != nil {
> >         driver = "hyper-v"
> >         payload, err = json.Marshal(s.HyperV)
> >     } else if s.S390 != nil {
> >         driver = "s390"
> >         payload, err = json.Marshal(s.S390)
> >     }
> >
> >     if err != nil {
> >         return nil, err
> >     }
> >
> >     if len(base) == len("{}") {
> >         base = []byte("")
> >     } else {
> >         base = append([]byte{','}, base[1:len(base)-1]...)
> >     }
> >
> >     if len(payload) == 0 || len(payload) == len("{}") {
> >         payload = []byte("")
> >     } else {
> >         payload = append([]byte{','}, payload[1:len(payload)-1]...)
> >     }
> >
> >     result := fmt.Sprintf(`{"type":"%s"%s%s}`, driver, base, payload)
> >     return []byte(result), nil
> 
> All this string manipulation looks sketchy. Is there some reason that
> I'm not seeing preventing you for doing something like the untested
> code below?
> 
>   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
>       if s.HyperV != nil {
>           type union struct {
>               Discriminator string                      `json:"type"`
>               HyperV        GuestPanicInformationHyperV `json:"hyper-v"`
>           }
>           tmp := union {
>               Discriminator: "hyper-v",
>               HyperV:        s.HyperV,
>           }
>           return json.Marshal(tmp)
>       } else if s.S390 != nil {
>           type union struct {
>               Discriminator string                      `json:"type"`
>               S390          GuestPanicInformationHyperV `json:"s390"`
>           }
>           tmp := union {
>               Discriminator: "s390",
>               S390:          s.S390,
>           }
>           return json.Marshal(tmp)
>       }
>       return nil, errors.New("...")
>   }

Using these dummy structs is the way I've approached the
discriminated union issue in the libvirt Golang XML bindings
and it works well. It is the bit I like the least, but it was
the lesser of many evils, and on the plus side in the QEMU case
it'll be auto-generated code.

> 
> > func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> >     type Alias GuestPanicInformation
> >     peek := struct {
> >         Alias
> >         Driver string `json:"type"`
> >     }{}
> >
> >     if err := json.Unmarshal(data, &peek); err != nil {
> >         return err
> >     }
> >     *s = GuestPanicInformation(peek.Alias)
> >
> >     switch peek.Driver {
> >
> >     case "hyper-v":
> >         s.HyperV = new(GuestPanicInformationHyperV)
> >         if err := json.Unmarshal(data, s.HyperV); err != nil {
> >             s.HyperV = nil
> >             return err
> >         }
> >     case "s390":
> >         s.S390 = new(GuestPanicInformationS390)
> >         if err := json.Unmarshal(data, s.S390); err != nil {
> >             s.S390 = nil
> >             return err
> >         }
> >     }
> >     // Unrecognizer drivers are silently ignored.
> >     return nil
> 
> This looks pretty reasonable, but since you're only using "peek" to
> look at the discriminator you should be able to leave out the Alias
> type entirely and perform the initial Unmarshal operation while
> ignoring all other fields.

Once you've defined the dummy structs for the Marshall case
though, you might as well use them for Unmarshall too, so you're
not parsing the JSON twice.

With regards,
Daniel
Andrea Bolognani July 6, 2022, 9:28 a.m. UTC | #3
On Tue, Jul 05, 2022 at 05:35:26PM +0100, Daniel P. Berrangé wrote:
> On Tue, Jul 05, 2022 at 08:45:30AM -0700, Andrea Bolognani wrote:
> > All this string manipulation looks sketchy. Is there some reason that
> > I'm not seeing preventing you for doing something like the untested
> > code below?
> >
> >   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
> >       if s.HyperV != nil {
> >           type union struct {
> >               Discriminator string                      `json:"type"`
> >               HyperV        GuestPanicInformationHyperV `json:"hyper-v"`
> >           }
> >           tmp := union {
> >               Discriminator: "hyper-v",
> >               HyperV:        s.HyperV,
> >           }
> >           return json.Marshal(tmp)
> >       } else if s.S390 != nil {
> >           type union struct {
> >               Discriminator string                      `json:"type"`
> >               S390          GuestPanicInformationHyperV `json:"s390"`
> >           }
> >           tmp := union {
> >               Discriminator: "s390",
> >               S390:          s.S390,
> >           }
> >           return json.Marshal(tmp)
> >       }
> >       return nil, errors.New("...")
> >   }
>
> Using these dummy structs is the way I've approached the
> discriminated union issue in the libvirt Golang XML bindings
> and it works well. It is the bit I like the least, but it was
> the lesser of many evils, and on the plus side in the QEMU case
> it'll be auto-generated code.

It appears to be the standard way to approach the problem in Go. It
sort of comes naturally given how the APIs for marshal/unmarshal have
been defined.

> > > func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> > >     type Alias GuestPanicInformation
> > >     peek := struct {
> > >         Alias
> > >         Driver string `json:"type"`
> > >     }{}
> > >
> > >     if err := json.Unmarshal(data, &peek); err != nil {
> > >         return err
> > >     }
> > >     *s = GuestPanicInformation(peek.Alias)
> > >
> > >     switch peek.Driver {
> > >
> > >     case "hyper-v":
> > >         s.HyperV = new(GuestPanicInformationHyperV)
> > >         if err := json.Unmarshal(data, s.HyperV); err != nil {
> > >             s.HyperV = nil
> > >             return err
> > >         }
> > >     case "s390":
> > >         s.S390 = new(GuestPanicInformationS390)
> > >         if err := json.Unmarshal(data, s.S390); err != nil {
> > >             s.S390 = nil
> > >             return err
> > >         }
> > >     }
> > >     // Unrecognizer drivers are silently ignored.
> > >     return nil
> >
> > This looks pretty reasonable, but since you're only using "peek" to
> > look at the discriminator you should be able to leave out the Alias
> > type entirely and perform the initial Unmarshal operation while
> > ignoring all other fields.
>
> Once you've defined the dummy structs for the Marshall case
> though, you might as well use them for Unmarshall too, so you're
> not parsing the JSON twice.

You're right, that is undesirable. What about something like this?

  type GuestPanicInformation struct {
      HyperV *GuestPanicInformationHyperV
      S390   *GuestPanicInformationS390
  }

  type jsonGuestPanicInformation struct {
      Discriminator string                       `json:"type"`
      HyperV        *GuestPanicInformationHyperV `json:"hyper-v"`
      S390          *GuestPanicInformationS390   `json:"s390"`
  }

  func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
      if (s.HyperV != nil && s.S390 != nil) ||
          (s.HyperV == nil && s.S390 == nil) {
          // client hasn't filled in the struct properly
          return nil, errors.New("...")
      }

      tmp := jsonGuestPanicInformation{}

      if s.HyperV != nil {
          tmp.Discriminator = "hyper-v"
          tmp.HyperV = s.HyperV
      } else if s.S390 != nil {
          tmp.Discriminator = "s390"
          tmp.S390 = s.S390
      }

      return json.Marshal(tmp)
  }

  func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
      tmp := jsonGuestPanicInformation{}

      err := json.Unmarshal(data, &tmp)
      if err != nil {
          return err
      }

      switch tmp.Discriminator {
      case "hyper-v":
          if tmp.HyperV == nil {
              return errors.New("...")
          }
          s.HyperV = tmp.HyperV
      case "s390":
          if tmp.S390 == nil {
              return errors.New("...")
          }
          s.S390 = tmp.S390
      }
      // if we hit none of the cases above, that means the
      // server has produced a variant we don't know about

      return nil
  }

This avoid parsing the JSON twice as well as having to define
multiple dummy structs, which keeps the code shorter and more
readable.

I've also thrown in some additional error checking for good measure,
ensuring that we abort when the input is completely nonsensical from
a semantical standpoint.
Daniel P. Berrangé July 6, 2022, 9:37 a.m. UTC | #4
On Wed, Jul 06, 2022 at 04:28:16AM -0500, Andrea Bolognani wrote:
> On Tue, Jul 05, 2022 at 05:35:26PM +0100, Daniel P. Berrangé wrote:
> > On Tue, Jul 05, 2022 at 08:45:30AM -0700, Andrea Bolognani wrote:
> > > All this string manipulation looks sketchy. Is there some reason that
> > > I'm not seeing preventing you for doing something like the untested
> > > code below?
> > >
> > >   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
> > >       if s.HyperV != nil {
> > >           type union struct {
> > >               Discriminator string                      `json:"type"`
> > >               HyperV        GuestPanicInformationHyperV `json:"hyper-v"`
> > >           }
> > >           tmp := union {
> > >               Discriminator: "hyper-v",
> > >               HyperV:        s.HyperV,
> > >           }
> > >           return json.Marshal(tmp)
> > >       } else if s.S390 != nil {
> > >           type union struct {
> > >               Discriminator string                      `json:"type"`
> > >               S390          GuestPanicInformationHyperV `json:"s390"`
> > >           }
> > >           tmp := union {
> > >               Discriminator: "s390",
> > >               S390:          s.S390,
> > >           }
> > >           return json.Marshal(tmp)
> > >       }
> > >       return nil, errors.New("...")
> > >   }
> >
> > Using these dummy structs is the way I've approached the
> > discriminated union issue in the libvirt Golang XML bindings
> > and it works well. It is the bit I like the least, but it was
> > the lesser of many evils, and on the plus side in the QEMU case
> > it'll be auto-generated code.
> 
> It appears to be the standard way to approach the problem in Go. It
> sort of comes naturally given how the APIs for marshal/unmarshal have
> been defined.
> 
> > > > func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> > > >     type Alias GuestPanicInformation
> > > >     peek := struct {
> > > >         Alias
> > > >         Driver string `json:"type"`
> > > >     }{}
> > > >
> > > >     if err := json.Unmarshal(data, &peek); err != nil {
> > > >         return err
> > > >     }
> > > >     *s = GuestPanicInformation(peek.Alias)
> > > >
> > > >     switch peek.Driver {
> > > >
> > > >     case "hyper-v":
> > > >         s.HyperV = new(GuestPanicInformationHyperV)
> > > >         if err := json.Unmarshal(data, s.HyperV); err != nil {
> > > >             s.HyperV = nil
> > > >             return err
> > > >         }
> > > >     case "s390":
> > > >         s.S390 = new(GuestPanicInformationS390)
> > > >         if err := json.Unmarshal(data, s.S390); err != nil {
> > > >             s.S390 = nil
> > > >             return err
> > > >         }
> > > >     }
> > > >     // Unrecognizer drivers are silently ignored.
> > > >     return nil
> > >
> > > This looks pretty reasonable, but since you're only using "peek" to
> > > look at the discriminator you should be able to leave out the Alias
> > > type entirely and perform the initial Unmarshal operation while
> > > ignoring all other fields.
> >
> > Once you've defined the dummy structs for the Marshall case
> > though, you might as well use them for Unmarshall too, so you're
> > not parsing the JSON twice.
> 
> You're right, that is undesirable. What about something like this?
> 
>   type GuestPanicInformation struct {
>       HyperV *GuestPanicInformationHyperV
>       S390   *GuestPanicInformationS390
>   }
> 
>   type jsonGuestPanicInformation struct {
>       Discriminator string                       `json:"type"`
>       HyperV        *GuestPanicInformationHyperV `json:"hyper-v"`
>       S390          *GuestPanicInformationS390   `json:"s390"`
>   }

It can possibly be even simpler with just embedding the real
struct

   type jsonGuestPanicInformation struct {
       Discriminator string
       GuestPanicInformation
   }

> 
>   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
>       if (s.HyperV != nil && s.S390 != nil) ||
>           (s.HyperV == nil && s.S390 == nil) {
>           // client hasn't filled in the struct properly
>           return nil, errors.New("...")
>       }
> 
>       tmp := jsonGuestPanicInformation{}
> 
>       if s.HyperV != nil {
>           tmp.Discriminator = "hyper-v"
>           tmp.HyperV = s.HyperV
>       } else if s.S390 != nil {
>           tmp.Discriminator = "s390"
>           tmp.S390 = s.S390
>       }
> 
>       return json.Marshal(tmp)
>   }

And...

       var discriminator string
       if s.HyperV != nil {
           discriminator = "hyper-v"
       } else if s.S390 != nil {
           discriminator = "s390"
       }

       tmp := jsonGuestPanicInformation{ discriminator, s}
       return json.Marshal(tmp)

> 
>   func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
>       tmp := jsonGuestPanicInformation{}
> 
>       err := json.Unmarshal(data, &tmp)
>       if err != nil {
>           return err
>       }
> 
>       switch tmp.Discriminator {
>       case "hyper-v":
>           if tmp.HyperV == nil {
>               return errors.New("...")
>           }
>           s.HyperV = tmp.HyperV
>       case "s390":
>           if tmp.S390 == nil {
>               return errors.New("...")
>           }
>           s.S390 = tmp.S390
>       }

I'm not actually sure this works, because the first json.Unmarshal
call won't know which branch to try unmarhsalling. So it might be
unavoidable to parse twice.  With the XML parser this wouldn't be
a problem as it has separated the parse phase and then fills the
struct after.

>       // if we hit none of the cases above, that means the
>       // server has produced a variant we don't know about
> 
>       return nil
>   }
> 
> This avoid parsing the JSON twice as well as having to define
> multiple dummy structs, which keeps the code shorter and more
> readable.
> 
> I've also thrown in some additional error checking for good measure,
> ensuring that we abort when the input is completely nonsensical from
> a semantical standpoint.

With regards,
Daniel
Daniel P. Berrangé July 6, 2022, 9:48 a.m. UTC | #5
On Wed, Jul 06, 2022 at 10:37:54AM +0100, Daniel P. Berrangé wrote:
> On Wed, Jul 06, 2022 at 04:28:16AM -0500, Andrea Bolognani wrote:
> > On Tue, Jul 05, 2022 at 05:35:26PM +0100, Daniel P. Berrangé wrote:
> > > On Tue, Jul 05, 2022 at 08:45:30AM -0700, Andrea Bolognani wrote:
> > > > All this string manipulation looks sketchy. Is there some reason that
> > > > I'm not seeing preventing you for doing something like the untested
> > > > code below?
> > > >
> > > >   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
> > > >       if s.HyperV != nil {
> > > >           type union struct {
> > > >               Discriminator string                      `json:"type"`
> > > >               HyperV        GuestPanicInformationHyperV `json:"hyper-v"`
> > > >           }
> > > >           tmp := union {
> > > >               Discriminator: "hyper-v",
> > > >               HyperV:        s.HyperV,
> > > >           }
> > > >           return json.Marshal(tmp)
> > > >       } else if s.S390 != nil {
> > > >           type union struct {
> > > >               Discriminator string                      `json:"type"`
> > > >               S390          GuestPanicInformationHyperV `json:"s390"`
> > > >           }
> > > >           tmp := union {
> > > >               Discriminator: "s390",
> > > >               S390:          s.S390,
> > > >           }
> > > >           return json.Marshal(tmp)
> > > >       }
> > > >       return nil, errors.New("...")
> > > >   }
> > >
> > > Using these dummy structs is the way I've approached the
> > > discriminated union issue in the libvirt Golang XML bindings
> > > and it works well. It is the bit I like the least, but it was
> > > the lesser of many evils, and on the plus side in the QEMU case
> > > it'll be auto-generated code.
> > 
> > It appears to be the standard way to approach the problem in Go. It
> > sort of comes naturally given how the APIs for marshal/unmarshal have
> > been defined.
> > 
> > > > > func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> > > > >     type Alias GuestPanicInformation
> > > > >     peek := struct {
> > > > >         Alias
> > > > >         Driver string `json:"type"`
> > > > >     }{}
> > > > >
> > > > >     if err := json.Unmarshal(data, &peek); err != nil {
> > > > >         return err
> > > > >     }
> > > > >     *s = GuestPanicInformation(peek.Alias)
> > > > >
> > > > >     switch peek.Driver {
> > > > >
> > > > >     case "hyper-v":
> > > > >         s.HyperV = new(GuestPanicInformationHyperV)
> > > > >         if err := json.Unmarshal(data, s.HyperV); err != nil {
> > > > >             s.HyperV = nil
> > > > >             return err
> > > > >         }
> > > > >     case "s390":
> > > > >         s.S390 = new(GuestPanicInformationS390)
> > > > >         if err := json.Unmarshal(data, s.S390); err != nil {
> > > > >             s.S390 = nil
> > > > >             return err
> > > > >         }
> > > > >     }
> > > > >     // Unrecognizer drivers are silently ignored.
> > > > >     return nil
> > > >
> > > > This looks pretty reasonable, but since you're only using "peek" to
> > > > look at the discriminator you should be able to leave out the Alias
> > > > type entirely and perform the initial Unmarshal operation while
> > > > ignoring all other fields.
> > >
> > > Once you've defined the dummy structs for the Marshall case
> > > though, you might as well use them for Unmarshall too, so you're
> > > not parsing the JSON twice.
> > 
> > You're right, that is undesirable. What about something like this?
> > 
> >   type GuestPanicInformation struct {
> >       HyperV *GuestPanicInformationHyperV
> >       S390   *GuestPanicInformationS390
> >   }
> > 
> >   type jsonGuestPanicInformation struct {
> >       Discriminator string                       `json:"type"`
> >       HyperV        *GuestPanicInformationHyperV `json:"hyper-v"`
> >       S390          *GuestPanicInformationS390   `json:"s390"`
> >   }
> 
> It can possibly be even simpler with just embedding the real
> struct
> 
>    type jsonGuestPanicInformation struct {
>        Discriminator string
>        GuestPanicInformation
>    }
> 
> > 
> >   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
> >       if (s.HyperV != nil && s.S390 != nil) ||
> >           (s.HyperV == nil && s.S390 == nil) {
> >           // client hasn't filled in the struct properly
> >           return nil, errors.New("...")
> >       }
> > 
> >       tmp := jsonGuestPanicInformation{}
> > 
> >       if s.HyperV != nil {
> >           tmp.Discriminator = "hyper-v"
> >           tmp.HyperV = s.HyperV
> >       } else if s.S390 != nil {
> >           tmp.Discriminator = "s390"
> >           tmp.S390 = s.S390
> >       }
> > 
> >       return json.Marshal(tmp)
> >   }
> 
> And...
> 
>        var discriminator string
>        if s.HyperV != nil {
>            discriminator = "hyper-v"
>        } else if s.S390 != nil {
>            discriminator = "s390"
>        }
> 
>        tmp := jsonGuestPanicInformation{ discriminator, s}
>        return json.Marshal(tmp)
> 
> > 
> >   func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> >       tmp := jsonGuestPanicInformation{}
> > 
> >       err := json.Unmarshal(data, &tmp)
> >       if err != nil {
> >           return err
> >       }
> > 
> >       switch tmp.Discriminator {
> >       case "hyper-v":
> >           if tmp.HyperV == nil {
> >               return errors.New("...")
> >           }
> >           s.HyperV = tmp.HyperV
> >       case "s390":
> >           if tmp.S390 == nil {
> >               return errors.New("...")
> >           }
> >           s.S390 = tmp.S390
> >       }
> 
> I'm not actually sure this works, because the first json.Unmarshal
> call won't know which branch to try unmarhsalling. So it might be
> unavoidable to parse twice.  With the XML parser this wouldn't be
> a problem as it has separated the parse phase and then fills the
> struct after.

Right afer sending, I remember how this is supposed to be done. It
involves use of 'json.RawMessage' eg examples at:

  https://pkg.go.dev/encoding/json#example-RawMessage-Unmarshal

So it would look like:

   type GuestPanicInformation struct {
       HyperV *GuestPanicInformationHyperV
       S390   *GuestPanicInformationS390
   }
 
   type jsonGuestPanicInformation struct {
       Discriminator string   `json:"type"`
       Payload *json.RawMessage
   }


    func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
        var p *json.RawMesage
        var err error
        if s.HyperV != nil {
            d = "hyper-v"
            p, err = json.Marshal(s.HyperV)
        } else if s.S390 != nil {
            d = "s390"
            p, err = json.Marshal(s.S390)
        } else {
	    err = fmt.Errorf("No payload defined")
	}
        if err != nil {
            return []byte{}, err
        }
  
        return json.Marshal(jsonGuestPanicInformation{d, p}), nil
    }


 
   func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
       tmp := jsonGuestPanicInformation{}
 
       err := json.Unmarshal(data, &tmp)
       if err != nil {
           return err
       }
 
       switch tmp.Discriminator {
       case "hyper-v":
           s.HyperV := GuestPanicInformationHyperV{}
           err := json.Unmarshal(tmp.Payload, s.HyperV)
           if err != nil {
              return err
           }
       case "s390":
           s.S390 := GuestPanicInformationS390{}
           err := json.Unmarshal(tmp.Payload, s.S390)
           if err != nil {
              return err
           }
       }

       return fmt.Errorf("Unknown type '%s'", tmp.Discriminator)
  }

With regards,
Daniel
Andrea Bolognani July 6, 2022, 12:20 p.m. UTC | #6
On Wed, Jul 06, 2022 at 10:48:06AM +0100, Daniel P. Berrangé wrote:
> On Wed, Jul 06, 2022 at 10:37:54AM +0100, Daniel P. Berrangé wrote:
> > On Wed, Jul 06, 2022 at 04:28:16AM -0500, Andrea Bolognani wrote:
> > >   func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> > >       tmp := jsonGuestPanicInformation{}
> > >
> > >       err := json.Unmarshal(data, &tmp)
> > >       if err != nil {
> > >           return err
> > >       }
> > >
> > >       switch tmp.Discriminator {
> > >       case "hyper-v":
> > >           if tmp.HyperV == nil {
> > >               return errors.New("...")
> > >           }
> > >           s.HyperV = tmp.HyperV
> > >       case "s390":
> > >           if tmp.S390 == nil {
> > >               return errors.New("...")
> > >           }
> > >           s.S390 = tmp.S390
> > >       }
> >
> > I'm not actually sure this works, because the first json.Unmarshal
> > call won't know which branch to try unmarhsalling. So it might be
> > unavoidable to parse twice.  With the XML parser this wouldn't be
> > a problem as it has separated the parse phase and then fills the
> > struct after.

It does, because the struct it's filling with data
(jsonGuestPanicInformation) covers all possible cases. We then pick
only the part we care about and transfer it to the user-provided
return location.

> Right afer sending, I remember how this is supposed to be done. It
> involves use of 'json.RawMessage' eg examples at:
>
>   https://pkg.go.dev/encoding/json#example-RawMessage-Unmarshal
>
> So it would look like:
>
>    type GuestPanicInformation struct {
>        HyperV *GuestPanicInformationHyperV
>        S390   *GuestPanicInformationS390
>    }
>
>    type jsonGuestPanicInformation struct {
>        Discriminator string   `json:"type"`
>        Payload *json.RawMessage
>    }
>
>     func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
>         var p *json.RawMesage
>         var err error
>         if s.HyperV != nil {
>             d = "hyper-v"
>             p, err = json.Marshal(s.HyperV)
>         } else if s.S390 != nil {
>             d = "s390"
>             p, err = json.Marshal(s.S390)
>         } else {
> 	    err = fmt.Errorf("No payload defined")
> 	}
>         if err != nil {
>             return []byte{}, err
>         }
>
>         return json.Marshal(jsonGuestPanicInformation{d, p}), nil
>     }
>
>    func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
>        tmp := jsonGuestPanicInformation{}
>
>        err := json.Unmarshal(data, &tmp)
>        if err != nil {
>            return err
>        }
>
>        switch tmp.Discriminator {
>        case "hyper-v":
>            s.HyperV := GuestPanicInformationHyperV{}
>            err := json.Unmarshal(tmp.Payload, s.HyperV)
>            if err != nil {
>               return err
>            }
>        case "s390":
>            s.S390 := GuestPanicInformationS390{}
>            err := json.Unmarshal(tmp.Payload, s.S390)
>            if err != nil {
>               return err
>            }
>        }
>
>        return fmt.Errorf("Unknown type '%s'", tmp.Discriminator)
>   }

I guess this version would work too, but I think it might still
perform more computation than the one I suggested?

json.RawMessage is a type alias for []byte, so I think the first call
to json.Unmarshal will have to go through the message to figure out
its structure and the contents of the discriminator field, then
tweak the original input so that only the part that hasn't been
consumed yet is returned. The second call to json.Marshal will then
parse that part, which means that the "inner" chunk of JSON ends up
being processed twice. In the approach I suggested, you go over the
entire JSON in one go.

Things might even out when you take into account allocating and
copying data around though. I'm not particularly interested in
splitting hair when it comes to the most efficient approach at this
point in time :) Knowing that we're not going through the entire JSON
twice is good enough.
Victor Toso Aug. 17, 2022, 4:06 p.m. UTC | #7
Hi,

On Wed, Jul 06, 2022 at 04:28:16AM -0500, Andrea Bolognani wrote:
> On Tue, Jul 05, 2022 at 05:35:26PM +0100, Daniel P. Berrangé wrote:
> > On Tue, Jul 05, 2022 at 08:45:30AM -0700, Andrea Bolognani wrote:
> > > All this string manipulation looks sketchy. Is there some reason that
> > > I'm not seeing preventing you for doing something like the untested
> > > code below?
> > >
> > >   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
> > >       if s.HyperV != nil {
> > >           type union struct {
> > >               Discriminator string                      `json:"type"`
> > >               HyperV        GuestPanicInformationHyperV `json:"hyper-v"`
> > >           }
> > >           tmp := union {
> > >               Discriminator: "hyper-v",
> > >               HyperV:        s.HyperV,
> > >           }
> > >           return json.Marshal(tmp)
> > >       } else if s.S390 != nil {
> > >           type union struct {
> > >               Discriminator string                      `json:"type"`
> > >               S390          GuestPanicInformationHyperV `json:"s390"`
> > >           }
> > >           tmp := union {
> > >               Discriminator: "s390",
> > >               S390:          s.S390,
> > >           }
> > >           return json.Marshal(tmp)
> > >       }
> > >       return nil, errors.New("...")
> > >   }
> >
> > Using these dummy structs is the way I've approached the
> > discriminated union issue in the libvirt Golang XML bindings
> > and it works well. It is the bit I like the least, but it was
> > the lesser of many evils, and on the plus side in the QEMU case
> > it'll be auto-generated code.
>
> It appears to be the standard way to approach the problem in Go. It
> sort of comes naturally given how the APIs for marshal/unmarshal have
> been defined.

Yep, string manipulation was bad choice. Some sort of anonymous
struct is a better fit. So I'll be changing this ...

> > > > func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> > > >     type Alias GuestPanicInformation
> > > >     peek := struct {
> > > >         Alias
> > > >         Driver string `json:"type"`
> > > >     }{}
> > > >
> > > >     if err := json.Unmarshal(data, &peek); err != nil {
> > > >         return err
> > > >     }
> > > >     *s = GuestPanicInformation(peek.Alias)
> > > >
> > > >     switch peek.Driver {
> > > >
> > > >     case "hyper-v":
> > > >         s.HyperV = new(GuestPanicInformationHyperV)
> > > >         if err := json.Unmarshal(data, s.HyperV); err != nil {
> > > >             s.HyperV = nil
> > > >             return err
> > > >         }
> > > >     case "s390":
> > > >         s.S390 = new(GuestPanicInformationS390)
> > > >         if err := json.Unmarshal(data, s.S390); err != nil {
> > > >             s.S390 = nil
> > > >             return err
> > > >         }
> > > >     }
> > > >     // Unrecognizer drivers are silently ignored.
> > > >     return nil
> > >
> > > This looks pretty reasonable, but since you're only using "peek" to
> > > look at the discriminator you should be able to leave out the Alias
> > > type entirely and perform the initial Unmarshal operation while
> > > ignoring all other fields.
> >
> > Once you've defined the dummy structs for the Marshall case
> > though, you might as well use them for Unmarshall too, so you're
> > not parsing the JSON twice.
>
> You're right, that is undesirable. What about something like this?
>
>   type GuestPanicInformation struct {
>       HyperV *GuestPanicInformationHyperV
>       S390   *GuestPanicInformationS390
>   }
>
>   type jsonGuestPanicInformation struct {
>       Discriminator string                       `json:"type"`
>       HyperV        *GuestPanicInformationHyperV `json:"hyper-v"`
>       S390          *GuestPanicInformationS390   `json:"s390"`
>   }

I didn't test this so I could be wrong but, I think this should
not work in case you want to remove the string manipulation.

The marshalling of either HyperV or S390 fields would return a
JSON Object but QAPI spec expects the fields at the same level as
the discriminator's type [0]. So, with this you still need some
string manipulation to remove the extra {}, like I did poorly
without any comment 0:-)

[0] https://gitlab.com/qemu-project/qemu/-/blob/master/docs/devel/qapi-code-gen.rst?plain=1#L358

>   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
>       if (s.HyperV != nil && s.S390 != nil) ||
>           (s.HyperV == nil && s.S390 == nil) {
>           // client hasn't filled in the struct properly
>           return nil, errors.New("...")
>       }
>
>       tmp := jsonGuestPanicInformation{}
>
>       if s.HyperV != nil {
>           tmp.Discriminator = "hyper-v"
>           tmp.HyperV = s.HyperV
>       } else if s.S390 != nil {
>           tmp.Discriminator = "s390"
>           tmp.S390 = s.S390
>       }
>
>       return json.Marshal(tmp)
>   }
>
>   func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
>       tmp := jsonGuestPanicInformation{}
>
>       err := json.Unmarshal(data, &tmp)
>       if err != nil {
>           return err
>       }
>
>       switch tmp.Discriminator {
>       case "hyper-v":
>           if tmp.HyperV == nil {
>               return errors.New("...")
>           }
>           s.HyperV = tmp.HyperV
>       case "s390":
>           if tmp.S390 == nil {
>               return errors.New("...")
>           }
>           s.S390 = tmp.S390
>       }
>       // if we hit none of the cases above, that means the
>       // server has produced a variant we don't know about
>
>       return nil
>   }
>
> This avoid parsing the JSON twice as well as having to define
> multiple dummy structs, which keeps the code shorter and more
> readable.

If not too verbose, I'd still use anonymous structs whenever
possible. They are created for a given scope, they are basically
self documented in that given context and don't polluted
package's namespace.

> I've also thrown in some additional error checking for good
> measure, ensuring that we abort when the input is completely
> nonsensical from a semantical standpoint.

Thanks!
Victor
Victor Toso Aug. 17, 2022, 4:25 p.m. UTC | #8
On Wed, Jul 06, 2022 at 10:48:06AM +0100, Daniel P. Berrangé wrote:
> On Wed, Jul 06, 2022 at 10:37:54AM +0100, Daniel P. Berrangé wrote:
> > On Wed, Jul 06, 2022 at 04:28:16AM -0500, Andrea Bolognani wrote:
> > > You're right, that is undesirable. What about something like this?
> > >
> > >   type GuestPanicInformation struct {
> > >       HyperV *GuestPanicInformationHyperV
> > >       S390   *GuestPanicInformationS390
> > >   }
> > >
> > >   type jsonGuestPanicInformation struct {
> > >       Discriminator string                       `json:"type"`
> > >       HyperV        *GuestPanicInformationHyperV `json:"hyper-v"`
> > >       S390          *GuestPanicInformationS390   `json:"s390"`
> > >   }
> >
> > It can possibly be even simpler with just embedding the real
> > struct
> >
> >    type jsonGuestPanicInformation struct {
> >        Discriminator string
> >        GuestPanicInformation
> >    }

Similar to what I said in previous email (same thread) to Andrea,
this would not work because the end result does not match with
QAPI spec, where HyperV or S390 fields should be at the same
level as 'type'.

If we embed either HyperV or S390, then it should work, like:

    tmp := struct {
        GuestPanicInformationHyperV
        Discriminator string "type"
    }{}

But I intend to try the json.RawMessage too as with description
it seems like we can avoid looking the whole json data twice.

> > >   func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
> > >       if (s.HyperV != nil && s.S390 != nil) ||
> > >           (s.HyperV == nil && s.S390 == nil) {
> > >           // client hasn't filled in the struct properly
> > >           return nil, errors.New("...")
> > >       }
> > > 
> > >       tmp := jsonGuestPanicInformation{}
> > > 
> > >       if s.HyperV != nil {
> > >           tmp.Discriminator = "hyper-v"
> > >           tmp.HyperV = s.HyperV
> > >       } else if s.S390 != nil {
> > >           tmp.Discriminator = "s390"
> > >           tmp.S390 = s.S390
> > >       }
> > > 
> > >       return json.Marshal(tmp)
> > >   }
> > 
> > And...
> > 
> >        var discriminator string
> >        if s.HyperV != nil {
> >            discriminator = "hyper-v"
> >        } else if s.S390 != nil {
> >            discriminator = "s390"
> >        }
> > 
> >        tmp := jsonGuestPanicInformation{ discriminator, s}
> >        return json.Marshal(tmp)
> > 
> > > 
> > >   func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
> > >       tmp := jsonGuestPanicInformation{}
> > > 
> > >       err := json.Unmarshal(data, &tmp)
> > >       if err != nil {
> > >           return err
> > >       }
> > > 
> > >       switch tmp.Discriminator {
> > >       case "hyper-v":
> > >           if tmp.HyperV == nil {
> > >               return errors.New("...")
> > >           }
> > >           s.HyperV = tmp.HyperV
> > >       case "s390":
> > >           if tmp.S390 == nil {
> > >               return errors.New("...")
> > >           }
> > >           s.S390 = tmp.S390
> > >       }
> >
> > I'm not actually sure this works, because the first json.Unmarshal
> > call won't know which branch to try unmarhsalling. So it might be
> > unavoidable to parse twice.  With the XML parser this wouldn't be
> > a problem as it has separated the parse phase and then fills the
> > struct after.
> 
> Right afer sending, I remember how this is supposed to be done. It
> involves use of 'json.RawMessage' eg examples at:
> 
>   https://pkg.go.dev/encoding/json#example-RawMessage-Unmarshal
> 
> So it would look like:
> 
>    type GuestPanicInformation struct {
>        HyperV *GuestPanicInformationHyperV
>        S390   *GuestPanicInformationS390
>    }
>  
>    type jsonGuestPanicInformation struct {
>        Discriminator string   `json:"type"`
>        Payload *json.RawMessage
>    }
> 
> 
>     func (s GuestPanicInformation) MarshalJSON() ([]byte, error) {
>         var p *json.RawMesage
>         var err error
>         if s.HyperV != nil {
>             d = "hyper-v"
>             p, err = json.Marshal(s.HyperV)
>         } else if s.S390 != nil {
>             d = "s390"
>             p, err = json.Marshal(s.S390)
>         } else {
> 	    err = fmt.Errorf("No payload defined")
> 	}
>         if err != nil {
>             return []byte{}, err
>         }
>   
>         return json.Marshal(jsonGuestPanicInformation{d, p}), nil
>     }
> 
> 
>  
>    func (s *GuestPanicInformation) UnmarshalJSON(data []byte) error {
>        tmp := jsonGuestPanicInformation{}
>  
>        err := json.Unmarshal(data, &tmp)
>        if err != nil {
>            return err
>        }
>  
>        switch tmp.Discriminator {
>        case "hyper-v":
>            s.HyperV := GuestPanicInformationHyperV{}
>            err := json.Unmarshal(tmp.Payload, s.HyperV)
>            if err != nil {
>               return err
>            }
>        case "s390":
>            s.S390 := GuestPanicInformationS390{}
>            err := json.Unmarshal(tmp.Payload, s.S390)
>            if err != nil {
>               return err
>            }
>        }
> 
>        return fmt.Errorf("Unknown type '%s'", tmp.Discriminator)
>   }
> 
> With regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Cheers,
Victor
Victor Toso Aug. 19, 2022, 7:20 a.m. UTC | #9
Hi,

On Wed, Aug 17, 2022 at 06:25:56PM +0200, Victor Toso wrote:
> On Wed, Jul 06, 2022 at 10:48:06AM +0100, Daniel P. Berrangé wrote:
> > On Wed, Jul 06, 2022 at 10:37:54AM +0100, Daniel P. Berrangé wrote:
> > > On Wed, Jul 06, 2022 at 04:28:16AM -0500, Andrea Bolognani wrote:
> > > > You're right, that is undesirable. What about something like this?
> > > >
> > > >   type GuestPanicInformation struct {
> > > >       HyperV *GuestPanicInformationHyperV
> > > >       S390   *GuestPanicInformationS390
> > > >   }
> > > >
> > > >   type jsonGuestPanicInformation struct {
> > > >       Discriminator string                       `json:"type"`
> > > >       HyperV        *GuestPanicInformationHyperV `json:"hyper-v"`
> > > >       S390          *GuestPanicInformationS390   `json:"s390"`
> > > >   }
> > >
> > > It can possibly be even simpler with just embedding the real
> > > struct
> > >
> > >    type jsonGuestPanicInformation struct {
> > >        Discriminator string
> > >        GuestPanicInformation
> > >    }
>
> Similar to what I said in previous email (same thread) to Andrea,
> this would not work because the end result does not match with
> QAPI spec, where HyperV or S390 fields should be at the same
> level as 'type'.
>
> If we embed either HyperV or S390, then it should work, like:
>
>     tmp := struct {
>         GuestPanicInformationHyperV
>         Discriminator string "type"
>     }{}
>
> But I intend to try the json.RawMessage too as with description
> it seems like we can avoid looking the whole json data twice.

For the same reason, I could not use json.RawMessage, sadly.

As Andrea pointed out before, json.RawMessage is just an alias
for []byte. We need a field for that (so, it can't be embed) and
the contents of the JSON need to match that field's name.

I've removed the string manipulation and used a few anonymous
structs instead. I'm attaching a main.go program that tests this
behavior together with input checks that Andrea suggested. This
is more or less how the generated code will look like in the next
iteration but in case one want's to find a nicer/shorter
solution, I'm all ears :)

Cheers,
Victor
package main

import (
	"encoding/json"
	"errors"
	"fmt"
	"strings"
)

type QCryptoBlockOptionsQCow struct {
	KeySecret *string `json:"key-secret,omitempty"`
}

type QCryptoBlockOptionsLUKS struct {
	KeySecret *string `json:"key-secret,omitempty"`
}

type QCryptoBlockOpenOptions struct {
	// Variants fields
	Qcow *QCryptoBlockOptionsQCow `json:"-"`
	Luks *QCryptoBlockOptionsLUKS `json:"-"`
}

func (s QCryptoBlockOpenOptions) MarshalJSON() ([]byte, error) {
	var bytes []byte
	var err error
	if s.Qcow != nil {
		tmp := struct {
			QCryptoBlockOptionsQCow
			Discriminator string `json:"format"`
		}{
			QCryptoBlockOptionsQCow: *s.Qcow,
			Discriminator:           "qcow",
		}
		if bytes, err = json.Marshal(tmp); err != nil {
			return nil, err
		}
	}
	if s.Luks != nil {
		if len(bytes) != 0 {
			return nil, errors.New(`multiple fields set for QCryptoBlockOpenOptions`)
		}
		tmp := struct {
			QCryptoBlockOptionsLUKS
			Discriminator string `json:"format"`
		}{
			QCryptoBlockOptionsLUKS: *s.Luks,
			Discriminator:           "luks",
		}
		if bytes, err = json.Marshal(tmp); err != nil {
			return nil, err
		}
	}
	if len(bytes) == 0 {
		return nil, errors.New(`null not supported for QCryptoBlockOpenOptions`)
	}
	return bytes, nil
}

func (s *QCryptoBlockOpenOptions) UnmarshalJSON(data []byte) error {
	tmp := struct {
		Discriminator string `json:"format"`
	}{}
	if err := json.Unmarshal(data, &tmp); err != nil {
		return err
	}
	switch tmp.Discriminator {
	case "qcow":
		s.Qcow = &QCryptoBlockOptionsQCow{}
		if err := json.Unmarshal(data, s.Qcow); err != nil {
			s.Qcow = nil
			return err
		}
	case "luks":
		s.Luks = &QCryptoBlockOptionsLUKS{}
		if err := json.Unmarshal(data, s.Luks); err != nil {
			s.Luks = nil
			return err
		}
	}
	return nil
}

func main() {
	jsonLuks := `{"key-secret":"my luks secret is here","format":"luks"}`
	jsonQcow := `{"key-secret":"my qcow secret is here","format":"qcow"}`
	r := QCryptoBlockOpenOptions{}
	if err := json.Unmarshal([]byte(jsonLuks), &r); err != nil {
		panic(err)
	} else if r.Luks == nil || r.Qcow != nil {
		panic(fmt.Sprintf("Wrong: %v", r))
	} else if b, err := json.Marshal(&r); err != nil {
		panic(err)
	} else if string(b) != jsonLuks {
		panic(string(b))
	}

	r = QCryptoBlockOpenOptions{}
	if err := json.Unmarshal([]byte(jsonQcow), &r); err != nil {
		panic(err)
	} else if r.Luks != nil || r.Qcow == nil {
		panic(fmt.Sprintf("Wrong: %v", r))
	} else if b, err := json.Marshal(&r); err != nil {
		panic(err)
	} else if string(b) != jsonQcow {
		panic(string(b))
	}

	r = QCryptoBlockOpenOptions{}
	if _, err := json.Marshal(&r); err == nil {
		panic("No fields set should be an error")
	} else if !strings.Contains(err.Error(), "null not supported") {
		panic(err)
	}

	qcowSecret := "my-qcow-secret-is-here"
	luksSecret := "my-luks-secret-is-here"
	r = QCryptoBlockOpenOptions{
		Qcow: &QCryptoBlockOptionsQCow{
			KeySecret: &qcowSecret,
		},
		Luks: &QCryptoBlockOptionsLUKS{
			KeySecret: &luksSecret,
		},
	}

	if _, err := json.Marshal(&r); err == nil {
		panic("multiple fields set should be an error")
	} else if !strings.Contains(err.Error(), "multiple fields set for") {
		panic(err)
	}
}
Andrea Bolognani Aug. 19, 2022, 3:25 p.m. UTC | #10
On Fri, Aug 19, 2022 at 09:20:52AM +0200, Victor Toso wrote:
> > > On Wed, Jul 06, 2022 at 10:37:54AM +0100, Daniel P. Berrangé wrote:
> > > > On Wed, Jul 06, 2022 at 04:28:16AM -0500, Andrea Bolognani wrote:
> > > > > You're right, that is undesirable. What about something like this?
> > > > >
> > > > >   type GuestPanicInformation struct {
> > > > >       HyperV *GuestPanicInformationHyperV
> > > > >       S390   *GuestPanicInformationS390
> > > > >   }
> > > > >
> > > > >   type jsonGuestPanicInformation struct {
> > > > >       Discriminator string                       `json:"type"`
> > > > >       HyperV        *GuestPanicInformationHyperV `json:"hyper-v"`
> > > > >       S390          *GuestPanicInformationS390   `json:"s390"`
> > > > >   }
> > > >
> > > > It can possibly be even simpler with just embedding the real
> > > > struct
> > > >
> > > >    type jsonGuestPanicInformation struct {
> > > >        Discriminator string
> > > >        GuestPanicInformation
> > > >    }
> >
> > Similar to what I said in previous email (same thread) to Andrea,
> > this would not work because the end result does not match with
> > QAPI spec, where HyperV or S390 fields should be at the same
> > level as 'type'.

Yeah, you're absolutely correct, I was misreading the schema and
suggested an implementation that couldn't possibly work.

> > If we embed either HyperV or S390, then it should work, like:
> >
> >     tmp := struct {
> >         GuestPanicInformationHyperV
> >         Discriminator string "type"
> >     }{}
>
> For the same reason, I could not use json.RawMessage, sadly.
>
> As Andrea pointed out before, json.RawMessage is just an alias
> for []byte. We need a field for that (so, it can't be embed) and
> the contents of the JSON need to match that field's name.

Right. It would work if unions looked like

  { "type": "...", "data": { ... }}

with the "data" part having different fields based on the value of
"type", but not for the flat JSON dictionaries that are used for QAPI
unions.

> func (s QCryptoBlockOpenOptions) MarshalJSON() ([]byte, error) {
> 	var bytes []byte
> 	var err error
> 	if s.Qcow != nil {
> 		tmp := struct {
> 			QCryptoBlockOptionsQCow
> 			Discriminator string `json:"format"`
> 		}{
> 			QCryptoBlockOptionsQCow: *s.Qcow,
> 			Discriminator:           "qcow",
> 		}
> 		if bytes, err = json.Marshal(tmp); err != nil {
> 			return nil, err
> 		}
> 	}
> 	if s.Luks != nil {
> 		if len(bytes) != 0 {
> 			return nil, errors.New(`multiple fields set for QCryptoBlockOpenOptions`)
> 		}

Why are you checking this here? I would just have a check like

  if s.Qcow != nil && s.Luks != nil {
      return nil, errors.New(`multiple fields set for QCryptoBlockOpenOptions`)
  }

at the top of the function, so that you can abort early and cheaply
if the user has provided invalid input instead of having to wait
after one of the fields has already been serialized.

> 		tmp := struct {
> 			QCryptoBlockOptionsLUKS
> 			Discriminator string `json:"format"`
> 		}{
> 			QCryptoBlockOptionsLUKS: *s.Luks,
> 			Discriminator:           "luks",
> 		}
> 		if bytes, err = json.Marshal(tmp); err != nil {
> 			return nil, err
> 		}
> 	}
> 	if len(bytes) == 0 {
> 		return nil, errors.New(`null not supported for QCryptoBlockOpenOptions`)
> 	}

Similarly, this should be checked early. So the complete check could
be turned into

  if (s.Qcow != nil && s.Luks != nil) || (s.Qcow == nil && s.Luks == nil) {
      return nil, errors.New("must set exactly one field")
  }

You should have enough information to produce such a check when
generating the function, right? If making sure all possible
combinations are covered up ends up being too complicated, something
like

  var setFields int
  if s.Field1 != nil {
      setFields += 1
  }
  if s.Field2 != nil {
      setFields += 1
  }
  // ...
  if setFields != 1 {
      return nil, errors.New("must set exactly one field")
  }

would also work.

> func (s *QCryptoBlockOpenOptions) UnmarshalJSON(data []byte) error {
> 	tmp := struct {
> 		Discriminator string `json:"format"`
> 	}{}
> 	if err := json.Unmarshal(data, &tmp); err != nil {
> 		return err
> 	}
> 	switch tmp.Discriminator {
> 	case "qcow":
> 		s.Qcow = &QCryptoBlockOptionsQCow{}
> 		if err := json.Unmarshal(data, s.Qcow); err != nil {
> 			s.Qcow = nil
> 			return err
> 		}
> 	case "luks":
> 		s.Luks = &QCryptoBlockOptionsLUKS{}
> 		if err := json.Unmarshal(data, s.Luks); err != nil {
> 			s.Luks = nil
> 			return err
> 		}
> 	}
> 	return nil
> }

Alternatively, you could define a struct that covers all possible
fields and deserialize everything in one go, then copy the various
parts over to the appropriate struct:

  func (s *QCryptoBlockOpenOptions) UnmarshalJSON(data []byte) error {
      tmp := struct {
          Discriminator string  `json:"format"`
          KeySecret     *string `json:"key-secret,omitempty"`
      }{}
      if err := json.Unmarshal(data, &tmp); err != nil {
          return err
      }
      switch tmp.Discriminator {
      case "qcow":
          s.Qcow = &QCryptoBlockOptionsQCow{}
          s.Qcow.KeySecret = tmp.KeySecret
      case "luks":
          s.Luks = &QCryptoBlockOptionsLUKS{}
          s.Luks.KeySecret = tmp.KeySecret
      }
      return nil
  }

Honestly I'm unclear on which option is better. Parsing the JSON
twice, as you're doing in your version, could be problematic when the
document is large; on the other hand, my approach will probably
result in more copies of the same data being created.
Victor Toso Aug. 22, 2022, 6:33 a.m. UTC | #11
Hi,

On Fri, Aug 19, 2022 at 10:25:26AM -0500, Andrea Bolognani wrote:
> > func (s QCryptoBlockOpenOptions) MarshalJSON() ([]byte, error) {
> > 	var bytes []byte
> > 	var err error
> > 	if s.Qcow != nil {
> > 		tmp := struct {
> > 			QCryptoBlockOptionsQCow
> > 			Discriminator string `json:"format"`
> > 		}{
> > 			QCryptoBlockOptionsQCow: *s.Qcow,
> > 			Discriminator:           "qcow",
> > 		}
> > 		if bytes, err = json.Marshal(tmp); err != nil {
> > 			return nil, err
> > 		}
> > 	}
> > 	if s.Luks != nil {
> > 		if len(bytes) != 0 {
> > 			return nil, errors.New(`multiple fields set for QCryptoBlockOpenOptions`)
> > 		}
>
> Why are you checking this here? I would just have a check like
>
>   if s.Qcow != nil && s.Luks != nil {
>       return nil, errors.New(`multiple fields set for QCryptoBlockOpenOptions`)
>   }
>
> at the top of the function, so that you can abort early and
> cheaply if the user has provided invalid input instead of
> having to wait after one of the fields has already been
> serialized.

In general, I too prefer to return early! So I agree with you
that it is nicer. At the same time, I was thinking a bit from the
code generator point of view and the overall expected diffs when
generating new code.  More below.

> > 		tmp := struct {
> > 			QCryptoBlockOptionsLUKS
> > 			Discriminator string `json:"format"`
> > 		}{
> > 			QCryptoBlockOptionsLUKS: *s.Luks,
> > 			Discriminator:           "luks",
> > 		}
> > 		if bytes, err = json.Marshal(tmp); err != nil {
> > 			return nil, err
> > 		}
> > 	}
> > 	if len(bytes) == 0 {
> > 		return nil, errors.New(`null not supported for QCryptoBlockOpenOptions`)
> > 	}
>
> Similarly, this should be checked early. So the complete check
> could be turned into
>
>   if (s.Qcow != nil && s.Luks != nil) || (s.Qcow == nil && s.Luks == nil) {
>       return nil, errors.New("must set exactly one field")
>   }

Main problem with this approach is that there is some big unions
like BlockdevOptions, this means that we would have to repeat all
fields by the number fields times (40+ in BlockdevOptions' case).

> You should have enough information to produce such a check when
> generating the function, right?

We do!

> If making sure all possible combinations are covered up ends up
> being too complicated, something
> like
>
>   var setFields int
>   if s.Field1 != nil {
>       setFields += 1
>   }
>   if s.Field2 != nil {
>       setFields += 1
>   }
>   // ...
>   if setFields != 1 {
>       return nil, errors.New("must set exactly one field")
>   }
>
> would also work.

I started with this approach actually but then I realized that we
can just keep the if checks and instead of counter, do check the
variable bytes []byte. So, do you think that the counter is
actually preferred instead of inner check? I don't mind changing
it.

> > func (s *QCryptoBlockOpenOptions) UnmarshalJSON(data []byte) error {
> > 	tmp := struct {
> > 		Discriminator string `json:"format"`
> > 	}{}
> > 	if err := json.Unmarshal(data, &tmp); err != nil {
> > 		return err
> > 	}
> > 	switch tmp.Discriminator {
> > 	case "qcow":
> > 		s.Qcow = &QCryptoBlockOptionsQCow{}
> > 		if err := json.Unmarshal(data, s.Qcow); err != nil {
> > 			s.Qcow = nil
> > 			return err
> > 		}
> > 	case "luks":
> > 		s.Luks = &QCryptoBlockOptionsLUKS{}
> > 		if err := json.Unmarshal(data, s.Luks); err != nil {
> > 			s.Luks = nil
> > 			return err
> > 		}
> > 	}
> > 	return nil
> > }
>
> Alternatively, you could define a struct that covers all
> possible fields and deserialize everything in one go, then copy
> the various parts over to the appropriate struct:
>
>   func (s *QCryptoBlockOpenOptions) UnmarshalJSON(data []byte) error {
>       tmp := struct {
>           Discriminator string  `json:"format"`
>           KeySecret     *string `json:"key-secret,omitempty"`
>       }{}
>       if err := json.Unmarshal(data, &tmp); err != nil {
>           return err
>       }
>       switch tmp.Discriminator {
>       case "qcow":
>           s.Qcow = &QCryptoBlockOptionsQCow{}
>           s.Qcow.KeySecret = tmp.KeySecret
>       case "luks":
>           s.Luks = &QCryptoBlockOptionsLUKS{}
>           s.Luks.KeySecret = tmp.KeySecret

I think this one adds a bit more complexity and I'm not convinced
that the gains are worth.

For example, with types that differ over fields with the same
name? We would need to move that inside the anonymous struct
somehow;

For example,if Luks's KeySecret was KeyScretType we would have to
use KeySecretLuks *KeySecretType. Still, both of them would
likely be inside of "key-secret" json object (so, same key for
two different fields, not really sure how that would work out)

>       }
>       return nil
>   }
>
> Honestly I'm unclear on which option is better. Parsing the
> JSON twice, as you're doing in your version, could be
> problematic when the document is large; on the other hand, my
> approach will probably result in more copies of the same data
> being created.

It does sound nice to parse it once but if we really want to do
that, we would need to work with Golang's JSON decoder [0]. IMHO,
parsing twice with in-memory data might be okay for the moment
while we are trying to keep things simple and later we could
benchmark against a custom decoder [0] in the future.

[0] https://pkg.go.dev/encoding/json#Decoder

Cheers,
Victor
diff mbox series

Patch

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index 1ab0c0bb46..6c6a5cea97 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -53,7 +53,8 @@  class QAPISchemaGenGolangVisitor(QAPISchemaVisitor):
 
     def __init__(self, prefix: str):
         super().__init__()
-        self.target = {name: "" for name in ["alternate", "enum", "helper", "struct"]}
+        self.target = {name: "" for name in ["alternate", "enum", "helper", "struct",
+                                             "union"]}
         self.objects_seen = {}
         self.schema = None
         self.golang_package_name = "qapi"
@@ -79,10 +80,14 @@  def visit_object_type(self: QAPISchemaGenGolangVisitor,
                           members: List[QAPISchemaObjectTypeMember],
                           variants: Optional[QAPISchemaVariants]
                           ) -> None:
-        # Do not handle anything besides structs
+        # Do not handle anything besides struct and unions.
         if (name == self.schema.the_empty_object_type.name or
                 not isinstance(name, str) or
-                info.defn_meta not in ["struct"]):
+                info.defn_meta not in ["struct", "union"]):
+            return
+
+        # Base structs are embed
+        if qapi_name_is_base(name):
             return
 
         # Safety checks.
@@ -110,6 +115,10 @@  def visit_object_type(self: QAPISchemaGenGolangVisitor,
                                                              base,
                                                              members,
                                                              variants)
+        if info.defn_meta == "union":
+            self.target[info.defn_meta] += qapi_to_golang_methods_union(name,
+                                                                        info,
+                                                                        variants)
 
     def visit_alternate_type(self: QAPISchemaGenGolangVisitor,
                              name: str,
@@ -311,14 +320,99 @@  def qapi_to_golang_struct(name: str,
             # Variant's are handled in the Marshal/Unmarshal methods
             fieldtag = '`json:"-"`'
             fields += f"\t{field} *{member_type}{fieldtag}\n"
-            member_type = qapi_schema_type_to_go_type(var.type.name)
-            # Variant's are handled in the Marshal/Unmarshal methods
-            fieldtag = '`json:"-"`'
-            fields += f"\t{field} *{member_type}{fieldtag}\n"
 
     return generate_struct_type(type_name, fields)
 
 
+def qapi_to_golang_methods_union(name: str,
+                                 info: Optional[QAPISourceInfo],
+                                 variants: Optional[QAPISchemaVariants]
+                                 ) -> str:
+
+    type_name = qapi_to_go_type_name(name, info.defn_meta)
+
+    driverCases = ""
+    checkFields = ""
+    if variants:
+        for var in variants.variants:
+            if var.type.is_implicit():
+                continue
+
+            field = qapi_to_field_name(var.name)
+            member_type = qapi_schema_type_to_go_type(var.type.name)
+
+            if len(checkFields) > 0:
+                checkFields += "\t} else "
+            checkFields += f'''if s.{field} != nil {{
+        driver = "{var.name}"
+        payload, err = json.Marshal(s.{field})
+'''
+            # for Unmarshal method
+            driverCases += f'''
+    case "{var.name}":
+        s.{field} = new({member_type})
+        if err := json.Unmarshal(data, s.{field}); err != nil {{
+            s.{field} = nil
+            return err
+        }}'''
+
+        checkFields += "}"
+
+    return f'''
+func (s {type_name}) MarshalJSON() ([]byte, error) {{
+    type Alias {type_name}
+    alias := Alias(s)
+    base, err := json.Marshal(alias)
+    if err != nil {{
+        return nil, err
+    }}
+
+    driver := ""
+    payload := []byte{{}}
+    {checkFields}
+
+    if err != nil {{
+        return nil, err
+    }}
+
+    if len(base) == len("{{}}") {{
+        base = []byte("")
+    }} else {{
+        base = append([]byte{{','}}, base[1:len(base)-1]...)
+    }}
+
+    if len(payload) == 0 || len(payload) == len("{{}}") {{
+        payload = []byte("")
+    }} else {{
+        payload = append([]byte{{','}}, payload[1:len(payload)-1]...)
+    }}
+
+    result := fmt.Sprintf(`{{"{variants.tag_member.name}":"%s"%s%s}}`, driver, base, payload)
+    return []byte(result), nil
+}}
+
+func (s *{type_name}) UnmarshalJSON(data []byte) error {{
+    type Alias {type_name}
+    peek := struct {{
+        Alias
+        Driver string `json:"{variants.tag_member.name}"`
+    }}{{}}
+
+
+    if err := json.Unmarshal(data, &peek); err != nil {{
+        return err
+    }}
+    *s = {type_name}(peek.Alias)
+
+    switch peek.Driver {{
+    {driverCases}
+    }}
+    // Unrecognizer drivers are silently ignored.
+    return nil
+}}
+'''
+
+
 def qapi_schema_type_to_go_type(type: str) -> str:
     schema_types_to_go = {
             'str': 'string', 'null': 'nil', 'bool': 'bool', 'number':
@@ -345,6 +439,10 @@  def qapi_to_field_name(name: str) -> str:
     return name.title().replace("_", "").replace("-", "")
 
 
+def qapi_name_is_base(name: str) -> bool:
+    return name.startswith("q_obj_") and name.endswith("-base")
+
+
 def qapi_to_go_type_name(name: str, meta: str) -> str:
     if name.startswith("q_obj_"):
         name = name[6:]