diff mbox series

[RESEND,1/3] vfio/pci: fix a null pointer reference in vfio_rom_read

Message ID 20200224064219.1434-2-longpeng2@huawei.com
State New
Headers show
Series fix some warnings by static code scan tool | expand

Commit Message

From: Longpeng <longpeng2@huawei.com>

vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
some situation (though I've not encountered yet), maybe we should
avoid the VM abort.

Signed-off-by: Longpeng <longpeng2@huawei.com>
---
 hw/vfio/pci.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

Comments

Alex Williamson Feb. 24, 2020, 4:04 p.m. UTC | #1
On Mon, 24 Feb 2020 14:42:17 +0800
"Longpeng(Mike)" <longpeng2@huawei.com> wrote:

> From: Longpeng <longpeng2@huawei.com>
> 
> vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
> some situation (though I've not encountered yet), maybe we should
> avoid the VM abort.
> 
> Signed-off-by: Longpeng <longpeng2@huawei.com>
> ---
>  hw/vfio/pci.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 5e75a95..ed798ae 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -768,7 +768,7 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
>      }
>  }
>  
> -static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> +static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
>  {
>      struct vfio_region_info *reg_info;
>      uint64_t size;
> @@ -778,7 +778,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>      if (vfio_get_region_info(&vdev->vbasedev,
>                               VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
>          error_report("vfio: Error getting ROM info: %m");
> -        return;
> +        return false;
>      }
>  
>      trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
> @@ -797,7 +797,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>          error_printf("Device option ROM contents are probably invalid "
>                      "(check dmesg).\nSkip option ROM probe with rombar=0, "
>                      "or load from file with romfile=\n");
> -        return;
> +        return false;
>      }
>  
>      vdev->rom = g_malloc(size);
> @@ -849,6 +849,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>              data[6] = -csum;
>          }
>      }
> +
> +    return true;
>  }
>  
>  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
> @@ -863,8 +865,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>      uint64_t data = 0;
>  
>      /* Load the ROM lazily when the guest tries to read it */
> -    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
> -        vfio_pci_load_rom(vdev);
> +    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
> +        !vfio_pci_load_rom(vdev)) {
> +        return 0;
>      }
>  
>      memcpy(&val, vdev->rom + addr,

Looks like an obvious bug, until you look at the rest of this memcpy():

memcpy(&val, vdev->rom + addr,
           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);

IOW, we'll do a zero sized memcpy() if rom_size is zero, so there's no
risk of the concern identified in the commit log.  This patch is
unnecessary.  Thanks,

Alex
On 2020/2/25 0:04, Alex Williamson wrote:
> On Mon, 24 Feb 2020 14:42:17 +0800
> "Longpeng(Mike)" <longpeng2@huawei.com> wrote:
> 
>> From: Longpeng <longpeng2@huawei.com>
>>
>> vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
>> some situation (though I've not encountered yet), maybe we should
>> avoid the VM abort.
>>
>> Signed-off-by: Longpeng <longpeng2@huawei.com>
>> ---
>>  hw/vfio/pci.c | 13 ++++++++-----
>>  1 file changed, 8 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 5e75a95..ed798ae 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -768,7 +768,7 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
>>      }
>>  }
>>  
>> -static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>> +static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>  {
>>      struct vfio_region_info *reg_info;
>>      uint64_t size;
>> @@ -778,7 +778,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>      if (vfio_get_region_info(&vdev->vbasedev,
>>                               VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
>>          error_report("vfio: Error getting ROM info: %m");
>> -        return;
>> +        return false;
>>      }
>>  
>>      trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
>> @@ -797,7 +797,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>          error_printf("Device option ROM contents are probably invalid "
>>                      "(check dmesg).\nSkip option ROM probe with rombar=0, "
>>                      "or load from file with romfile=\n");
>> -        return;
>> +        return false;
>>      }
>>  
>>      vdev->rom = g_malloc(size);
>> @@ -849,6 +849,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>              data[6] = -csum;
>>          }
>>      }
>> +
>> +    return true;
>>  }
>>  
>>  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>> @@ -863,8 +865,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>>      uint64_t data = 0;
>>  
>>      /* Load the ROM lazily when the guest tries to read it */
>> -    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
>> -        vfio_pci_load_rom(vdev);
>> +    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
>> +        !vfio_pci_load_rom(vdev)) {
>> +        return 0;
>>      }
>>  
>>      memcpy(&val, vdev->rom + addr,
> 
> Looks like an obvious bug, until you look at the rest of this memcpy():
> 
> memcpy(&val, vdev->rom + addr,
>            (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
> 
> IOW, we'll do a zero sized memcpy() if rom_size is zero, so there's no
> risk of the concern identified in the commit log.  This patch is
> unnecessary.  Thanks,
> 
Oh, I missed that, sorry for make the noise, thanks

> Alex
> 
> .
>
Alex Williamson March 10, 2020, 4:11 p.m. UTC | #3
On Tue, 25 Feb 2020 07:48:33 +0800
"Longpeng (Mike, Cloud Infrastructure Service Product Dept.)"
<longpeng2@huawei.com> wrote:

> On 2020/2/25 0:04, Alex Williamson wrote:
> > On Mon, 24 Feb 2020 14:42:17 +0800
> > "Longpeng(Mike)" <longpeng2@huawei.com> wrote:
> >   
> >> From: Longpeng <longpeng2@huawei.com>
> >>
> >> vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
> >> some situation (though I've not encountered yet), maybe we should
> >> avoid the VM abort.
> >>
> >> Signed-off-by: Longpeng <longpeng2@huawei.com>
> >> ---
> >>  hw/vfio/pci.c | 13 ++++++++-----
> >>  1 file changed, 8 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 5e75a95..ed798ae 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -768,7 +768,7 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
> >>      }
> >>  }
> >>  
> >> -static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >> +static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>  {
> >>      struct vfio_region_info *reg_info;
> >>      uint64_t size;
> >> @@ -778,7 +778,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>      if (vfio_get_region_info(&vdev->vbasedev,
> >>                               VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
> >>          error_report("vfio: Error getting ROM info: %m");
> >> -        return;
> >> +        return false;
> >>      }
> >>  
> >>      trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
> >> @@ -797,7 +797,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>          error_printf("Device option ROM contents are probably invalid "
> >>                      "(check dmesg).\nSkip option ROM probe with rombar=0, "
> >>                      "or load from file with romfile=\n");
> >> -        return;
> >> +        return false;
> >>      }
> >>  
> >>      vdev->rom = g_malloc(size);
> >> @@ -849,6 +849,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>              data[6] = -csum;
> >>          }
> >>      }
> >> +
> >> +    return true;
> >>  }
> >>  
> >>  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
> >> @@ -863,8 +865,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
> >>      uint64_t data = 0;
> >>  
> >>      /* Load the ROM lazily when the guest tries to read it */
> >> -    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
> >> -        vfio_pci_load_rom(vdev);
> >> +    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
> >> +        !vfio_pci_load_rom(vdev)) {
> >> +        return 0;
> >>      }
> >>  
> >>      memcpy(&val, vdev->rom + addr,  
> > 
> > Looks like an obvious bug, until you look at the rest of this memcpy():
> > 
> > memcpy(&val, vdev->rom + addr,
> >            (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
> > 
> > IOW, we'll do a zero sized memcpy() if rom_size is zero, so there's no
> > risk of the concern identified in the commit log.  This patch is
> > unnecessary.  Thanks,
> >   
> Oh, I missed that, sorry for make the noise, thanks

Actually, not noise.  After some internal discussion thanks to Laszlo,
it seems that while memcpy() with a zero size seems to do the right
thing, the behavior for any case where we pass a null pointer is not
actually defined.  However, there's still a bug in the implementation
of the fix above, if vdev->rom_read_failed is set, we'll still fall
through to the memcpy.  I think there's also another bug in the current
implementation that we initialize data to zero but we'll overwrite it
with the uninitialized 'val' in the switch statement.  I think the
below resolves both.  I'll formally post it after a bit of testing:

commit 2088fc1e1f426b98e9ca4d7bcdbe53d886a18c37
Author: Alex Williamson <alex.williamson@redhat.com>
Date:   Tue Mar 10 10:04:36 2020 -0600

    vfio/pci: Use defined memcpy() behavior
    
    vfio_rom_read() relies on memcpy() doing the logically correct thing,
    ie. safely copying zero bytes from a NULL pointer when rom_size is
    zero, rather than the spec definition, which is undefined when the
    source or target pointers are NULL.  Resolve this by wrapping the
    call in the condition expressed previously by the ternary.
    
    Additionally, we still use @val to fill data based on the provided
    @size regardless of mempcy(), so we should initialize @val rather
    than @data.
    
    Reported-by: Longpeng <longpeng2@huawei.com>
    Reported-by: Laszlo Ersek <lersek@redhat.com>
    Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5e75a95129ac..b0799cdc28ad 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -859,16 +859,17 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
         uint16_t word;
         uint32_t dword;
         uint64_t qword;
-    } val;
-    uint64_t data = 0;
+    } val = { 0 };
+    uint64_t data;
 
     /* Load the ROM lazily when the guest tries to read it */
     if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
         vfio_pci_load_rom(vdev);
     }
 
-    memcpy(&val, vdev->rom + addr,
-           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
+    if (addr < vdev->rom_size) {
+        memcpy(&val, vdev->rom + addr, MIN(size, vdev->rom_size - addr));
+    }
 
     switch (size) {
     case 1:
Laszlo Ersek March 10, 2020, 11:14 p.m. UTC | #4
On 03/10/20 17:11, Alex Williamson wrote:

> commit 2088fc1e1f426b98e9ca4d7bcdbe53d886a18c37
> Author: Alex Williamson <alex.williamson@redhat.com>
> Date:   Tue Mar 10 10:04:36 2020 -0600
> 
>     vfio/pci: Use defined memcpy() behavior
>     
>     vfio_rom_read() relies on memcpy() doing the logically correct thing,
>     ie. safely copying zero bytes from a NULL pointer when rom_size is
>     zero, rather than the spec definition, which is undefined when the
>     source or target pointers are NULL.  Resolve this by wrapping the
>     call in the condition expressed previously by the ternary.
>     
>     Additionally, we still use @val to fill data based on the provided
>     @size regardless of mempcy(), so we should initialize @val rather
>     than @data.
>     
>     Reported-by: Longpeng <longpeng2@huawei.com>
>     Reported-by: Laszlo Ersek <lersek@redhat.com>
>     Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 5e75a95129ac..b0799cdc28ad 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -859,16 +859,17 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>          uint16_t word;
>          uint32_t dword;
>          uint64_t qword;
> -    } val;
> -    uint64_t data = 0;
> +    } val = { 0 };
> +    uint64_t data;
>  
>      /* Load the ROM lazily when the guest tries to read it */
>      if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
>          vfio_pci_load_rom(vdev);
>      }
>  
> -    memcpy(&val, vdev->rom + addr,
> -           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
> +    if (addr < vdev->rom_size) {
> +        memcpy(&val, vdev->rom + addr, MIN(size, vdev->rom_size - addr));
> +    }
>  
>      switch (size) {
>      case 1:

Regarding the pre-patch code:

My understanding is that the memcpy() could be reached with a
guest-originated "addr" even if "vdev->rom" was NULL. If that's the
case, then the pre-patch code invokes undefined behavior regardless of
memcpy(), because it performs pointer arithmetic on a null pointer (not
to mention that the type of that pointer is (void *)....)

Regarding the proposed change:

(addr < vdev->rom_size) requires that "vdev->rom_size" be positive. In
that case, I assume that

- "vdev->rom" is not NULL, and
-  MIN(size, vdev->rom_size - addr) bytes are "in range" for the object
allocated at "vdev->rom".

So from a memcpy() and range perspective, the patch looks OK. But
there's still a wart I dislike: we should never perform pointer
arithmetic on a (void*). I suggest casting (vdev->rom) to (uint8_t*) or
(unsigned char*) first.

Here's an excerpt from the ISO C99 standard:

-v-
6.5.6 Additive operators

Constraints

2 For addition, either both operands shall have arithmetic type, or one
  operand shall be a pointer to an object type and the other shall have
  integer type. [...]
-^-

A "pointer-to-void" is not a "pointer to an object type", because "void"
is not an object type -- it is an incomplete type that cannot be completed:

-v-
6.2.5 Types

1 [...] Types are partitioned into object types (types that fully
  describe objects), function types (types that describe functions), and
  incomplete types (types that describe objects but lack information
  needed to determine their sizes).

[...]

19 The void type comprises an empty set of values; it is an incomplete
   type that cannot be completed.
-^-

For a different illustration, (vdev->rom + addr) is equivalent to
&(vdev->rom[addr]) -- and we clearly can't have an "array of void".

This anti-pattern (of doing pointer arithmetic on (void*)) likely comes
from a guarantee that the standard does make, in the same "6.2.5 Types"
section:

-v-
27 A pointer to void shall have the same representation and alignment
   requirements as a pointer to a character type. 39) [...]

Footnote 39: The same representation and alignment requirements are
             meant to imply interchangeability as arguments to
             functions, return values from functions, and members of
             unions.
-^-

It does not extend to the "+" operator.

Thanks
Laszlo
Alex Williamson March 11, 2020, 1:36 a.m. UTC | #5
On Wed, 11 Mar 2020 00:14:31 +0100
Laszlo Ersek <lersek@redhat.com> wrote:

> On 03/10/20 17:11, Alex Williamson wrote:
> 
> > commit 2088fc1e1f426b98e9ca4d7bcdbe53d886a18c37
> > Author: Alex Williamson <alex.williamson@redhat.com>
> > Date:   Tue Mar 10 10:04:36 2020 -0600
> > 
> >     vfio/pci: Use defined memcpy() behavior
> >     
> >     vfio_rom_read() relies on memcpy() doing the logically correct thing,
> >     ie. safely copying zero bytes from a NULL pointer when rom_size is
> >     zero, rather than the spec definition, which is undefined when the
> >     source or target pointers are NULL.  Resolve this by wrapping the
> >     call in the condition expressed previously by the ternary.
> >     
> >     Additionally, we still use @val to fill data based on the provided
> >     @size regardless of mempcy(), so we should initialize @val rather
> >     than @data.
> >     
> >     Reported-by: Longpeng <longpeng2@huawei.com>
> >     Reported-by: Laszlo Ersek <lersek@redhat.com>
> >     Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 5e75a95129ac..b0799cdc28ad 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -859,16 +859,17 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
> >          uint16_t word;
> >          uint32_t dword;
> >          uint64_t qword;
> > -    } val;
> > -    uint64_t data = 0;
> > +    } val = { 0 };
> > +    uint64_t data;
> >  
> >      /* Load the ROM lazily when the guest tries to read it */
> >      if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
> >          vfio_pci_load_rom(vdev);
> >      }
> >  
> > -    memcpy(&val, vdev->rom + addr,
> > -           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
> > +    if (addr < vdev->rom_size) {
> > +        memcpy(&val, vdev->rom + addr, MIN(size, vdev->rom_size - addr));
> > +    }
> >  
> >      switch (size) {
> >      case 1:  
> 
> Regarding the pre-patch code:
> 
> My understanding is that the memcpy() could be reached with a
> guest-originated "addr" even if "vdev->rom" was NULL. If that's the
> case, then the pre-patch code invokes undefined behavior regardless of
> memcpy(), because it performs pointer arithmetic on a null pointer (not
> to mention that the type of that pointer is (void *)....)
> 
> Regarding the proposed change:
> 
> (addr < vdev->rom_size) requires that "vdev->rom_size" be positive. In
> that case, I assume that
> 
> - "vdev->rom" is not NULL, and
> -  MIN(size, vdev->rom_size - addr) bytes are "in range" for the object
> allocated at "vdev->rom".
> 
> So from a memcpy() and range perspective, the patch looks OK. But
> there's still a wart I dislike: we should never perform pointer
> arithmetic on a (void*). I suggest casting (vdev->rom) to (uint8_t*) or
> (unsigned char*) first.
> 
> Here's an excerpt from the ISO C99 standard:
> 
> -v-
> 6.5.6 Additive operators
> 
> Constraints
> 
> 2 For addition, either both operands shall have arithmetic type, or one
>   operand shall be a pointer to an object type and the other shall have
>   integer type. [...]
> -^-
> 
> A "pointer-to-void" is not a "pointer to an object type", because "void"
> is not an object type -- it is an incomplete type that cannot be completed:
> 
> -v-
> 6.2.5 Types
> 
> 1 [...] Types are partitioned into object types (types that fully
>   describe objects), function types (types that describe functions), and
>   incomplete types (types that describe objects but lack information
>   needed to determine their sizes).
> 
> [...]
> 
> 19 The void type comprises an empty set of values; it is an incomplete
>    type that cannot be completed.
> -^-
> 
> For a different illustration, (vdev->rom + addr) is equivalent to
> &(vdev->rom[addr]) -- and we clearly can't have an "array of void".
> 
> This anti-pattern (of doing pointer arithmetic on (void*)) likely comes
> from a guarantee that the standard does make, in the same "6.2.5 Types"
> section:
> 
> -v-
> 27 A pointer to void shall have the same representation and alignment
>    requirements as a pointer to a character type. 39) [...]
> 
> Footnote 39: The same representation and alignment requirements are
>              meant to imply interchangeability as arguments to
>              functions, return values from functions, and members of
>              unions.
> -^-
> 
> It does not extend to the "+" operator.

GNU C specifically allows arithmetic on pointers and defines the size
of a void as 1.  I'll comply, but this makes me want to stab myself in
the face :-\  Thanks,

Alex
Markus Armbruster March 11, 2020, 7:04 a.m. UTC | #6
Alex Williamson <alex.williamson@redhat.com> writes:

> On Mon, 24 Feb 2020 14:42:17 +0800
> "Longpeng(Mike)" <longpeng2@huawei.com> wrote:
>
>> From: Longpeng <longpeng2@huawei.com>
>> 
>> vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
>> some situation (though I've not encountered yet), maybe we should
>> avoid the VM abort.

What "VM abort" exactly?

>> 
>> Signed-off-by: Longpeng <longpeng2@huawei.com>
>> ---
>>  hw/vfio/pci.c | 13 ++++++++-----
>>  1 file changed, 8 insertions(+), 5 deletions(-)
>> 
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 5e75a95..ed798ae 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -768,7 +768,7 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
>>      }
>>  }
>>  
>> -static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>> +static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>  {
>>      struct vfio_region_info *reg_info;
>>      uint64_t size;
>> @@ -778,7 +778,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>      if (vfio_get_region_info(&vdev->vbasedev,
>>                               VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
>>          error_report("vfio: Error getting ROM info: %m");
>> -        return;
>> +        return false;
>>      }
>>  
>>      trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
>> @@ -797,7 +797,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>          error_printf("Device option ROM contents are probably invalid "
>>                      "(check dmesg).\nSkip option ROM probe with rombar=0, "
>>                      "or load from file with romfile=\n");
>> -        return;
>> +        return false;
>>      }
>>  
>>      vdev->rom = g_malloc(size);
>> @@ -849,6 +849,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>>              data[6] = -csum;
>>          }
>>      }
>> +
>> +    return true;
>>  }
>>  
>>  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>> @@ -863,8 +865,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
    {
        VFIOPCIDevice *vdev = opaque;
        union {
            uint8_t byte;
            uint16_t word;
            uint32_t dword;
            uint64_t qword;
        } val;
>>      uint64_t data = 0;
>>  
>>      /* Load the ROM lazily when the guest tries to read it */
>> -    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
>> -        vfio_pci_load_rom(vdev);
>> +    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
>> +        !vfio_pci_load_rom(vdev)) {
>> +        return 0;
>>      }
>>  
>>      memcpy(&val, vdev->rom + addr,
>
> Looks like an obvious bug, until you look at the rest of this memcpy():
>
> memcpy(&val, vdev->rom + addr,
>            (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
>
> IOW, we'll do a zero sized memcpy() if rom_size is zero, so there's no
> risk of the concern identified in the commit log.  This patch is
> unnecessary.  Thanks,

I'm blind: why does !vdev->rom imply !vdev->rom_size?

Moreover, when MIN(size, vdev->rom_size - addr) < size, we seem to read
uninitialized data from @val:

        switch (size) {
        case 1:
            data = val.byte;
            break;
        case 2:
            data = le16_to_cpu(val.word);
            break;
        case 4:
            data = le32_to_cpu(val.dword);
            break;
        default:
            hw_error("vfio: unsupported read size, %d bytes\n", size);
            break;
        }

        trace_vfio_rom_read(vdev->vbasedev.name, addr, size, data);

        return data;
    }

Why is that okay?

Why do we initialize @data?

How can we get to the default case?  If we can get there, is hw_error()
really the right thing to do?  It almost never is...  If getting there
is the guest's fault, we need to tell it off the same way physical
hardware does.  If we should not ever get there (i.e. it's a QEMU bug),
then a plain abort() would be clearer.
Markus Armbruster March 11, 2020, 7:08 a.m. UTC | #7
Alex Williamson <alex.williamson@redhat.com> writes:

> On Wed, 11 Mar 2020 00:14:31 +0100
> Laszlo Ersek <lersek@redhat.com> wrote:
[...]
>> So from a memcpy() and range perspective, the patch looks OK. But
>> there's still a wart I dislike: we should never perform pointer
>> arithmetic on a (void*). I suggest casting (vdev->rom) to (uint8_t*) or
>> (unsigned char*) first.
>> 
>> Here's an excerpt from the ISO C99 standard:
>> 
>> -v-
>> 6.5.6 Additive operators
>> 
>> Constraints
>> 
>> 2 For addition, either both operands shall have arithmetic type, or one
>>   operand shall be a pointer to an object type and the other shall have
>>   integer type. [...]
>> -^-
>> 
>> A "pointer-to-void" is not a "pointer to an object type", because "void"
>> is not an object type -- it is an incomplete type that cannot be completed:
>> 
>> -v-
>> 6.2.5 Types
>> 
>> 1 [...] Types are partitioned into object types (types that fully
>>   describe objects), function types (types that describe functions), and
>>   incomplete types (types that describe objects but lack information
>>   needed to determine their sizes).
>> 
>> [...]
>> 
>> 19 The void type comprises an empty set of values; it is an incomplete
>>    type that cannot be completed.
>> -^-
>> 
>> For a different illustration, (vdev->rom + addr) is equivalent to
>> &(vdev->rom[addr]) -- and we clearly can't have an "array of void".
>> 
>> This anti-pattern (of doing pointer arithmetic on (void*)) likely comes
>> from a guarantee that the standard does make, in the same "6.2.5 Types"
>> section:
>> 
>> -v-
>> 27 A pointer to void shall have the same representation and alignment
>>    requirements as a pointer to a character type. 39) [...]
>> 
>> Footnote 39: The same representation and alignment requirements are
>>              meant to imply interchangeability as arguments to
>>              functions, return values from functions, and members of
>>              unions.
>> -^-
>> 
>> It does not extend to the "+" operator.
>
> GNU C specifically allows arithmetic on pointers and defines the size
> of a void as 1.  I'll comply, but this makes me want to stab myself in
> the face :-\  Thanks,

We rely on GNU C extensions all over theplace.  Making the code uglier
to avoid relying on this one here makes no sense to me.
Laszlo Ersek March 11, 2020, 10:26 a.m. UTC | #8
On 03/11/20 02:36, Alex Williamson wrote:
> On Wed, 11 Mar 2020 00:14:31 +0100
> Laszlo Ersek <lersek@redhat.com> wrote:
> 
>> On 03/10/20 17:11, Alex Williamson wrote:
>>
>>> commit 2088fc1e1f426b98e9ca4d7bcdbe53d886a18c37
>>> Author: Alex Williamson <alex.williamson@redhat.com>
>>> Date:   Tue Mar 10 10:04:36 2020 -0600
>>>
>>>     vfio/pci: Use defined memcpy() behavior
>>>     
>>>     vfio_rom_read() relies on memcpy() doing the logically correct thing,
>>>     ie. safely copying zero bytes from a NULL pointer when rom_size is
>>>     zero, rather than the spec definition, which is undefined when the
>>>     source or target pointers are NULL.  Resolve this by wrapping the
>>>     call in the condition expressed previously by the ternary.
>>>     
>>>     Additionally, we still use @val to fill data based on the provided
>>>     @size regardless of mempcy(), so we should initialize @val rather
>>>     than @data.
>>>     
>>>     Reported-by: Longpeng <longpeng2@huawei.com>
>>>     Reported-by: Laszlo Ersek <lersek@redhat.com>
>>>     Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>>
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index 5e75a95129ac..b0799cdc28ad 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -859,16 +859,17 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>>>          uint16_t word;
>>>          uint32_t dword;
>>>          uint64_t qword;
>>> -    } val;
>>> -    uint64_t data = 0;
>>> +    } val = { 0 };
>>> +    uint64_t data;
>>>  
>>>      /* Load the ROM lazily when the guest tries to read it */
>>>      if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
>>>          vfio_pci_load_rom(vdev);
>>>      }
>>>  
>>> -    memcpy(&val, vdev->rom + addr,
>>> -           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
>>> +    if (addr < vdev->rom_size) {
>>> +        memcpy(&val, vdev->rom + addr, MIN(size, vdev->rom_size - addr));
>>> +    }
>>>  
>>>      switch (size) {
>>>      case 1:  
>>
>> Regarding the pre-patch code:
>>
>> My understanding is that the memcpy() could be reached with a
>> guest-originated "addr" even if "vdev->rom" was NULL. If that's the
>> case, then the pre-patch code invokes undefined behavior regardless of
>> memcpy(), because it performs pointer arithmetic on a null pointer (not
>> to mention that the type of that pointer is (void *)....)
>>
>> Regarding the proposed change:
>>
>> (addr < vdev->rom_size) requires that "vdev->rom_size" be positive. In
>> that case, I assume that
>>
>> - "vdev->rom" is not NULL, and
>> -  MIN(size, vdev->rom_size - addr) bytes are "in range" for the object
>> allocated at "vdev->rom".
>>
>> So from a memcpy() and range perspective, the patch looks OK. But
>> there's still a wart I dislike: we should never perform pointer
>> arithmetic on a (void*). I suggest casting (vdev->rom) to (uint8_t*) or
>> (unsigned char*) first.
>>
>> Here's an excerpt from the ISO C99 standard:
>>
>> -v-
>> 6.5.6 Additive operators
>>
>> Constraints
>>
>> 2 For addition, either both operands shall have arithmetic type, or one
>>   operand shall be a pointer to an object type and the other shall have
>>   integer type. [...]
>> -^-
>>
>> A "pointer-to-void" is not a "pointer to an object type", because "void"
>> is not an object type -- it is an incomplete type that cannot be completed:
>>
>> -v-
>> 6.2.5 Types
>>
>> 1 [...] Types are partitioned into object types (types that fully
>>   describe objects), function types (types that describe functions), and
>>   incomplete types (types that describe objects but lack information
>>   needed to determine their sizes).
>>
>> [...]
>>
>> 19 The void type comprises an empty set of values; it is an incomplete
>>    type that cannot be completed.
>> -^-
>>
>> For a different illustration, (vdev->rom + addr) is equivalent to
>> &(vdev->rom[addr]) -- and we clearly can't have an "array of void".
>>
>> This anti-pattern (of doing pointer arithmetic on (void*)) likely comes
>> from a guarantee that the standard does make, in the same "6.2.5 Types"
>> section:
>>
>> -v-
>> 27 A pointer to void shall have the same representation and alignment
>>    requirements as a pointer to a character type. 39) [...]
>>
>> Footnote 39: The same representation and alignment requirements are
>>              meant to imply interchangeability as arguments to
>>              functions, return values from functions, and members of
>>              unions.
>> -^-
>>
>> It does not extend to the "+" operator.
> 
> GNU C specifically allows arithmetic on pointers and defines the size
> of a void as 1.  I'll comply, but this makes me want to stab myself in
> the face :-\  Thanks,

Sorry, I didn't want to annoy you. :)

In fact I was about to mention, "I really don't understand why compilers
don't yell upon seeing pointer-to-void arithmetic", but I got distracted
and forgot about that thought. In retrospect, that may have been for the
best! :)

Thanks
Laszlo
Laszlo Ersek March 11, 2020, 10:28 a.m. UTC | #9
On 03/11/20 08:08, Markus Armbruster wrote:
> Alex Williamson <alex.williamson@redhat.com> writes:
> 
>> On Wed, 11 Mar 2020 00:14:31 +0100
>> Laszlo Ersek <lersek@redhat.com> wrote:
> [...]
>>> So from a memcpy() and range perspective, the patch looks OK. But
>>> there's still a wart I dislike: we should never perform pointer
>>> arithmetic on a (void*). I suggest casting (vdev->rom) to (uint8_t*) or
>>> (unsigned char*) first.
>>>
>>> Here's an excerpt from the ISO C99 standard:
>>>
>>> -v-
>>> 6.5.6 Additive operators
>>>
>>> Constraints
>>>
>>> 2 For addition, either both operands shall have arithmetic type, or one
>>>   operand shall be a pointer to an object type and the other shall have
>>>   integer type. [...]
>>> -^-
>>>
>>> A "pointer-to-void" is not a "pointer to an object type", because "void"
>>> is not an object type -- it is an incomplete type that cannot be completed:
>>>
>>> -v-
>>> 6.2.5 Types
>>>
>>> 1 [...] Types are partitioned into object types (types that fully
>>>   describe objects), function types (types that describe functions), and
>>>   incomplete types (types that describe objects but lack information
>>>   needed to determine their sizes).
>>>
>>> [...]
>>>
>>> 19 The void type comprises an empty set of values; it is an incomplete
>>>    type that cannot be completed.
>>> -^-
>>>
>>> For a different illustration, (vdev->rom + addr) is equivalent to
>>> &(vdev->rom[addr]) -- and we clearly can't have an "array of void".
>>>
>>> This anti-pattern (of doing pointer arithmetic on (void*)) likely comes
>>> from a guarantee that the standard does make, in the same "6.2.5 Types"
>>> section:
>>>
>>> -v-
>>> 27 A pointer to void shall have the same representation and alignment
>>>    requirements as a pointer to a character type. 39) [...]
>>>
>>> Footnote 39: The same representation and alignment requirements are
>>>              meant to imply interchangeability as arguments to
>>>              functions, return values from functions, and members of
>>>              unions.
>>> -^-
>>>
>>> It does not extend to the "+" operator.
>>
>> GNU C specifically allows arithmetic on pointers and defines the size
>> of a void as 1.  I'll comply, but this makes me want to stab myself in
>> the face :-\  Thanks,
> 
> We rely on GNU C extensions all over theplace.  Making the code uglier
> to avoid relying on this one here makes no sense to me.
> 

I agree, in fact. If GNU-isms are liberally used & tolerated in the QEMU
source, then there's no reason to diverge from that here. I steer clear
of GNU-isms as much as I can, regardless of codebase, but I *did* forget
that QEMU permits GNU-isms -- so there's no need for my pedantry here.

Reviewed-by: Laszlo Ersek <lersek@redhat.com>

Thanks!
Laszlo
Markus Armbruster March 11, 2020, 11:54 a.m. UTC | #10
Laszlo Ersek <lersek@redhat.com> writes:

> On 03/11/20 02:36, Alex Williamson wrote:
>> On Wed, 11 Mar 2020 00:14:31 +0100
>> Laszlo Ersek <lersek@redhat.com> wrote:
>> 
>>> On 03/10/20 17:11, Alex Williamson wrote:
>>>
>>>> commit 2088fc1e1f426b98e9ca4d7bcdbe53d886a18c37
>>>> Author: Alex Williamson <alex.williamson@redhat.com>
>>>> Date:   Tue Mar 10 10:04:36 2020 -0600
>>>>
>>>>     vfio/pci: Use defined memcpy() behavior
>>>>     
>>>>     vfio_rom_read() relies on memcpy() doing the logically correct thing,
>>>>     ie. safely copying zero bytes from a NULL pointer when rom_size is
>>>>     zero, rather than the spec definition, which is undefined when the
>>>>     source or target pointers are NULL.  Resolve this by wrapping the
>>>>     call in the condition expressed previously by the ternary.
>>>>     
>>>>     Additionally, we still use @val to fill data based on the provided
>>>>     @size regardless of mempcy(), so we should initialize @val rather
>>>>     than @data.
>>>>     
>>>>     Reported-by: Longpeng <longpeng2@huawei.com>
>>>>     Reported-by: Laszlo Ersek <lersek@redhat.com>
>>>>     Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>>>
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index 5e75a95129ac..b0799cdc28ad 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -859,16 +859,17 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>>>>          uint16_t word;
>>>>          uint32_t dword;
>>>>          uint64_t qword;
>>>> -    } val;
>>>> -    uint64_t data = 0;
>>>> +    } val = { 0 };
>>>> +    uint64_t data;
>>>>  
>>>>      /* Load the ROM lazily when the guest tries to read it */
>>>>      if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
>>>>          vfio_pci_load_rom(vdev);
>>>>      }
>>>>  
>>>> -    memcpy(&val, vdev->rom + addr,
>>>> -           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
>>>> +    if (addr < vdev->rom_size) {
>>>> +        memcpy(&val, vdev->rom + addr, MIN(size, vdev->rom_size - addr));
>>>> +    }
>>>>  
>>>>      switch (size) {
>>>>      case 1:  
>>>
>>> Regarding the pre-patch code:
>>>
>>> My understanding is that the memcpy() could be reached with a
>>> guest-originated "addr" even if "vdev->rom" was NULL. If that's the
>>> case, then the pre-patch code invokes undefined behavior regardless of
>>> memcpy(), because it performs pointer arithmetic on a null pointer (not
>>> to mention that the type of that pointer is (void *)....)
>>>
>>> Regarding the proposed change:
>>>
>>> (addr < vdev->rom_size) requires that "vdev->rom_size" be positive. In
>>> that case, I assume that
>>>
>>> - "vdev->rom" is not NULL, and
>>> -  MIN(size, vdev->rom_size - addr) bytes are "in range" for the object
>>> allocated at "vdev->rom".
>>>
>>> So from a memcpy() and range perspective, the patch looks OK. But
>>> there's still a wart I dislike: we should never perform pointer
>>> arithmetic on a (void*). I suggest casting (vdev->rom) to (uint8_t*) or
>>> (unsigned char*) first.
>>>
>>> Here's an excerpt from the ISO C99 standard:
>>>
>>> -v-
>>> 6.5.6 Additive operators
>>>
>>> Constraints
>>>
>>> 2 For addition, either both operands shall have arithmetic type, or one
>>>   operand shall be a pointer to an object type and the other shall have
>>>   integer type. [...]
>>> -^-
>>>
>>> A "pointer-to-void" is not a "pointer to an object type", because "void"
>>> is not an object type -- it is an incomplete type that cannot be completed:
>>>
>>> -v-
>>> 6.2.5 Types
>>>
>>> 1 [...] Types are partitioned into object types (types that fully
>>>   describe objects), function types (types that describe functions), and
>>>   incomplete types (types that describe objects but lack information
>>>   needed to determine their sizes).
>>>
>>> [...]
>>>
>>> 19 The void type comprises an empty set of values; it is an incomplete
>>>    type that cannot be completed.
>>> -^-
>>>
>>> For a different illustration, (vdev->rom + addr) is equivalent to
>>> &(vdev->rom[addr]) -- and we clearly can't have an "array of void".
>>>
>>> This anti-pattern (of doing pointer arithmetic on (void*)) likely comes
>>> from a guarantee that the standard does make, in the same "6.2.5 Types"
>>> section:
>>>
>>> -v-
>>> 27 A pointer to void shall have the same representation and alignment
>>>    requirements as a pointer to a character type. 39) [...]
>>>
>>> Footnote 39: The same representation and alignment requirements are
>>>              meant to imply interchangeability as arguments to
>>>              functions, return values from functions, and members of
>>>              unions.
>>> -^-
>>>
>>> It does not extend to the "+" operator.
>> 
>> GNU C specifically allows arithmetic on pointers and defines the size
>> of a void as 1.  I'll comply, but this makes me want to stab myself in
>> the face :-\  Thanks,
>
> Sorry, I didn't want to annoy you. :)
>
> In fact I was about to mention, "I really don't understand why compilers
> don't yell upon seeing pointer-to-void arithmetic", but I got distracted
> and forgot about that thought. In retrospect, that may have been for the
> best! :)

You're looking for

'-Wpointer-arith'
     Warn about anything that depends on the "size of" a function type
     or of 'void'.  GNU C assigns these types a size of 1, for
     convenience in calculations with 'void *' pointers and pointers to
     functions.  In C++, warn also when an arithmetic operation involves
     'NULL'.  This warning is also enabled by '-Wpedantic'.
Laszlo Ersek March 11, 2020, 1 p.m. UTC | #11
On 03/11/20 12:54, Markus Armbruster wrote:
> Laszlo Ersek <lersek@redhat.com> writes:

>> In fact I was about to mention, "I really don't understand why compilers
>> don't yell upon seeing pointer-to-void arithmetic", but I got distracted
>> and forgot about that thought. In retrospect, that may have been for the
>> best! :)
> 
> You're looking for
> 
> '-Wpointer-arith'
>      Warn about anything that depends on the "size of" a function type
>      or of 'void'.  GNU C assigns these types a size of 1, for
>      convenience in calculations with 'void *' pointers and pointers to
>      functions.  In C++, warn also when an arithmetic operation involves
>      'NULL'.  This warning is also enabled by '-Wpedantic'.
> 

Thanks! It seems like "-Wpedantic" and "-pedantic" are synonymous. And
the latter used to be part of my standard set of flags, while I worked
on hosted C programs where I could influence the build flags ;)

Cheers,
Laszlo
Alex Williamson March 11, 2020, 3:39 p.m. UTC | #12
On Wed, 11 Mar 2020 08:04:28 +0100
Markus Armbruster <armbru@redhat.com> wrote:

> Alex Williamson <alex.williamson@redhat.com> writes:
> 
> > On Mon, 24 Feb 2020 14:42:17 +0800
> > "Longpeng(Mike)" <longpeng2@huawei.com> wrote:
> >  
> >> From: Longpeng <longpeng2@huawei.com>
> >> 
> >> vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
> >> some situation (though I've not encountered yet), maybe we should
> >> avoid the VM abort.  
> 
> What "VM abort" exactly?

There is none because memcpy() does something sane when size is zero,
but to be ISO whatever spec compliant we shouldn't rely on that.

> >> 
> >> Signed-off-by: Longpeng <longpeng2@huawei.com>
> >> ---
> >>  hw/vfio/pci.c | 13 ++++++++-----
> >>  1 file changed, 8 insertions(+), 5 deletions(-)
> >> 
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 5e75a95..ed798ae 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -768,7 +768,7 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
> >>      }
> >>  }
> >>  
> >> -static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >> +static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>  {
> >>      struct vfio_region_info *reg_info;
> >>      uint64_t size;
> >> @@ -778,7 +778,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>      if (vfio_get_region_info(&vdev->vbasedev,
> >>                               VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
> >>          error_report("vfio: Error getting ROM info: %m");
> >> -        return;
> >> +        return false;
> >>      }
> >>  
> >>      trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
> >> @@ -797,7 +797,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>          error_printf("Device option ROM contents are probably invalid "
> >>                      "(check dmesg).\nSkip option ROM probe with rombar=0, "
> >>                      "or load from file with romfile=\n");
> >> -        return;
> >> +        return false;
> >>      }
> >>  
> >>      vdev->rom = g_malloc(size);
> >> @@ -849,6 +849,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >>              data[6] = -csum;
> >>          }
> >>      }
> >> +
> >> +    return true;
> >>  }
> >>  
> >>  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
> >> @@ -863,8 +865,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)  
>     {
>         VFIOPCIDevice *vdev = opaque;
>         union {
>             uint8_t byte;
>             uint16_t word;
>             uint32_t dword;
>             uint64_t qword;
>         } val;
> >>      uint64_t data = 0;
> >>  
> >>      /* Load the ROM lazily when the guest tries to read it */
> >> -    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
> >> -        vfio_pci_load_rom(vdev);
> >> +    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
> >> +        !vfio_pci_load_rom(vdev)) {
> >> +        return 0;
> >>      }
> >>  
> >>      memcpy(&val, vdev->rom + addr,  
> >
> > Looks like an obvious bug, until you look at the rest of this memcpy():
> >
> > memcpy(&val, vdev->rom + addr,
> >            (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
> >
> > IOW, we'll do a zero sized memcpy() if rom_size is zero, so there's no
> > risk of the concern identified in the commit log.  This patch is
> > unnecessary.  Thanks,  
> 
> I'm blind: why does !vdev->rom imply !vdev->rom_size?

See vfio_pci_load_rom(), rom_size and rom are set and allocated
together.

> Moreover, when MIN(size, vdev->rom_size - addr) < size, we seem to read
> uninitialized data from @val:

This is fixed in my patch
https://lists.gnu.org/archive/html/qemu-devel/2020-03/msg02778.html

> 
>         switch (size) {
>         case 1:
>             data = val.byte;
>             break;
>         case 2:
>             data = le16_to_cpu(val.word);
>             break;
>         case 4:
>             data = le32_to_cpu(val.dword);
>             break;
>         default:
>             hw_error("vfio: unsupported read size, %d bytes\n", size);
>             break;
>         }
> 
>         trace_vfio_rom_read(vdev->vbasedev.name, addr, size, data);
> 
>         return data;
>     }
> 
> Why is that okay?
> 
> Why do we initialize @data?

Bug.  The switch was only added later (75bd0c7253f3) and we failed to
catch it.  Prior to that we were initializing val and the memcpy() only
overwrote it as necessary.  In any case, getting back garbage for the
rom when there isn't one generally works ok since the chances of
generating a proper rom signature are infinitesimal.  Clearly not what
was intended though.

> How can we get to the default case?  If we can get there, is hw_error()
> really the right thing to do?  It almost never is...  If getting there
> is the guest's fault, we need to tell it off the same way physical
> hardware does.  If we should not ever get there (i.e. it's a QEMU bug),
> then a plain abort() would be clearer.

AFAIK this is relatively standard, if not somewhat paranoid, handling
for a MemoryRegion ops callback.  The MemoryRegionOps code only allows
certain size accesses, so it would effectively be an internal error to
hit the default case, which seems to be not an uncommon use case of
hw_error.  Thanks,

Alex
Alex Williamson March 11, 2020, 3:56 p.m. UTC | #13
On Wed, 11 Mar 2020 11:28:26 +0100
Laszlo Ersek <lersek@redhat.com> wrote:

> On 03/11/20 08:08, Markus Armbruster wrote:
> > Alex Williamson <alex.williamson@redhat.com> writes:
> >   
> >> On Wed, 11 Mar 2020 00:14:31 +0100
> >> Laszlo Ersek <lersek@redhat.com> wrote:  
> > [...]  
> >>> So from a memcpy() and range perspective, the patch looks OK. But
> >>> there's still a wart I dislike: we should never perform pointer
> >>> arithmetic on a (void*). I suggest casting (vdev->rom) to (uint8_t*) or
> >>> (unsigned char*) first.
> >>>
> >>> Here's an excerpt from the ISO C99 standard:
> >>>
> >>> -v-
> >>> 6.5.6 Additive operators
> >>>
> >>> Constraints
> >>>
> >>> 2 For addition, either both operands shall have arithmetic type, or one
> >>>   operand shall be a pointer to an object type and the other shall have
> >>>   integer type. [...]
> >>> -^-
> >>>
> >>> A "pointer-to-void" is not a "pointer to an object type", because "void"
> >>> is not an object type -- it is an incomplete type that cannot be completed:
> >>>
> >>> -v-
> >>> 6.2.5 Types
> >>>
> >>> 1 [...] Types are partitioned into object types (types that fully
> >>>   describe objects), function types (types that describe functions), and
> >>>   incomplete types (types that describe objects but lack information
> >>>   needed to determine their sizes).
> >>>
> >>> [...]
> >>>
> >>> 19 The void type comprises an empty set of values; it is an incomplete
> >>>    type that cannot be completed.
> >>> -^-
> >>>
> >>> For a different illustration, (vdev->rom + addr) is equivalent to
> >>> &(vdev->rom[addr]) -- and we clearly can't have an "array of void".
> >>>
> >>> This anti-pattern (of doing pointer arithmetic on (void*)) likely comes
> >>> from a guarantee that the standard does make, in the same "6.2.5 Types"
> >>> section:
> >>>
> >>> -v-
> >>> 27 A pointer to void shall have the same representation and alignment
> >>>    requirements as a pointer to a character type. 39) [...]
> >>>
> >>> Footnote 39: The same representation and alignment requirements are
> >>>              meant to imply interchangeability as arguments to
> >>>              functions, return values from functions, and members of
> >>>              unions.
> >>> -^-
> >>>
> >>> It does not extend to the "+" operator.  
> >>
> >> GNU C specifically allows arithmetic on pointers and defines the size
> >> of a void as 1.  I'll comply, but this makes me want to stab myself in
> >> the face :-\  Thanks,  
> > 
> > We rely on GNU C extensions all over theplace.  Making the code uglier
> > to avoid relying on this one here makes no sense to me.
> >   
> 
> I agree, in fact. If GNU-isms are liberally used & tolerated in the QEMU
> source, then there's no reason to diverge from that here. I steer clear
> of GNU-isms as much as I can, regardless of codebase, but I *did* forget
> that QEMU permits GNU-isms -- so there's no need for my pedantry here.
> 
> Reviewed-by: Laszlo Ersek <lersek@redhat.com>

Oh, thank goodness ;)  Thanks,

Alex
Markus Armbruster March 12, 2020, 5:50 a.m. UTC | #14
Alex Williamson <alex.williamson@redhat.com> writes:

> On Wed, 11 Mar 2020 08:04:28 +0100
> Markus Armbruster <armbru@redhat.com> wrote:
>
>> Alex Williamson <alex.williamson@redhat.com> writes:
>> 
>> > On Mon, 24 Feb 2020 14:42:17 +0800
>> > "Longpeng(Mike)" <longpeng2@huawei.com> wrote:
>> >  
>> >> From: Longpeng <longpeng2@huawei.com>
>> >> 
>> >> vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
>> >> some situation (though I've not encountered yet), maybe we should
>> >> avoid the VM abort.  
>> 
>> What "VM abort" exactly?
>
> There is none because memcpy() does something sane when size is zero,
> but to be ISO whatever spec compliant we shouldn't rely on that.
>
>> >> 
>> >> Signed-off-by: Longpeng <longpeng2@huawei.com>
>> >> ---
>> >>  hw/vfio/pci.c | 13 ++++++++-----
>> >>  1 file changed, 8 insertions(+), 5 deletions(-)
>> >> 
>> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> >> index 5e75a95..ed798ae 100644
>> >> --- a/hw/vfio/pci.c
>> >> +++ b/hw/vfio/pci.c
>> >> @@ -768,7 +768,7 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
>> >>      }
>> >>  }
>> >>  
>> >> -static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>> >> +static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
>> >>  {
>> >>      struct vfio_region_info *reg_info;
>> >>      uint64_t size;
>> >> @@ -778,7 +778,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>> >>      if (vfio_get_region_info(&vdev->vbasedev,
>> >>                               VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
>> >>          error_report("vfio: Error getting ROM info: %m");
>> >> -        return;
>> >> +        return false;
>> >>      }
>> >>  
>> >>      trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
>> >> @@ -797,7 +797,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>> >>          error_printf("Device option ROM contents are probably invalid "
>> >>                      "(check dmesg).\nSkip option ROM probe with rombar=0, "
>> >>                      "or load from file with romfile=\n");
>> >> -        return;
>> >> +        return false;
>> >>      }
>> >>  
>> >>      vdev->rom = g_malloc(size);
>> >> @@ -849,6 +849,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
>> >>              data[6] = -csum;
>> >>          }
>> >>      }
>> >> +
>> >> +    return true;
>> >>  }
>> >>  
>> >>  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
>> >> @@ -863,8 +865,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)  
>>     {
>>         VFIOPCIDevice *vdev = opaque;
>>         union {
>>             uint8_t byte;
>>             uint16_t word;
>>             uint32_t dword;
>>             uint64_t qword;
>>         } val;
>> >>      uint64_t data = 0;
>> >>  
>> >>      /* Load the ROM lazily when the guest tries to read it */
>> >> -    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
>> >> -        vfio_pci_load_rom(vdev);
>> >> +    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
>> >> +        !vfio_pci_load_rom(vdev)) {
>> >> +        return 0;
>> >>      }
>> >>  
>> >>      memcpy(&val, vdev->rom + addr,  
>> >
>> > Looks like an obvious bug, until you look at the rest of this memcpy():
>> >
>> > memcpy(&val, vdev->rom + addr,
>> >            (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
>> >
>> > IOW, we'll do a zero sized memcpy() if rom_size is zero, so there's no
>> > risk of the concern identified in the commit log.  This patch is
>> > unnecessary.  Thanks,  
>> 
>> I'm blind: why does !vdev->rom imply !vdev->rom_size?
>
> See vfio_pci_load_rom(), rom_size and rom are set and allocated
> together.

What if vfio_pci_load_rom() isn't called, or returns before it sets
these guys?

>> Moreover, when MIN(size, vdev->rom_size - addr) < size, we seem to read
>> uninitialized data from @val:
>
> This is fixed in my patch
> https://lists.gnu.org/archive/html/qemu-devel/2020-03/msg02778.html

Yes.

>> 
>>         switch (size) {
>>         case 1:
>>             data = val.byte;
>>             break;
>>         case 2:
>>             data = le16_to_cpu(val.word);
>>             break;
>>         case 4:
>>             data = le32_to_cpu(val.dword);
>>             break;
>>         default:
>>             hw_error("vfio: unsupported read size, %d bytes\n", size);
>>             break;
>>         }
>> 
>>         trace_vfio_rom_read(vdev->vbasedev.name, addr, size, data);
>> 
>>         return data;
>>     }
>> 
>> Why is that okay?
>> 
>> Why do we initialize @data?
>
> Bug.  The switch was only added later (75bd0c7253f3) and we failed to
> catch it.  Prior to that we were initializing val and the memcpy() only
> overwrote it as necessary.  In any case, getting back garbage for the
> rom when there isn't one generally works ok since the chances of
> generating a proper rom signature are infinitesimal.  Clearly not what
> was intended though.
>
>> How can we get to the default case?  If we can get there, is hw_error()
>> really the right thing to do?  It almost never is...  If getting there
>> is the guest's fault, we need to tell it off the same way physical
>> hardware does.  If we should not ever get there (i.e. it's a QEMU bug),
>> then a plain abort() would be clearer.
>
> AFAIK this is relatively standard, if not somewhat paranoid, handling
> for a MemoryRegion ops callback.  The MemoryRegionOps code only allows
> certain size accesses, so it would effectively be an internal error to
> hit the default case, which seems to be not an uncommon use case of
> hw_error.  Thanks,

Using hw_error() for such programming errors is not helpful.  Everything
it adds to abort() is useless or misleading.

In fact, most uses of hw_error() are not helpful.

But you're going with the flow here.  I accept that.
Alex Williamson March 12, 2020, 2:07 p.m. UTC | #15
On Thu, 12 Mar 2020 06:50:30 +0100
Markus Armbruster <armbru@redhat.com> wrote:

> Alex Williamson <alex.williamson@redhat.com> writes:
> 
> > On Wed, 11 Mar 2020 08:04:28 +0100
> > Markus Armbruster <armbru@redhat.com> wrote:
> >  
> >> Alex Williamson <alex.williamson@redhat.com> writes:
> >>   
> >> > On Mon, 24 Feb 2020 14:42:17 +0800
> >> > "Longpeng(Mike)" <longpeng2@huawei.com> wrote:
> >> >    
> >> >> From: Longpeng <longpeng2@huawei.com>
> >> >> 
> >> >> vfio_pci_load_rom() maybe failed and then the vdev->rom is NULL in
> >> >> some situation (though I've not encountered yet), maybe we should
> >> >> avoid the VM abort.    
> >> 
> >> What "VM abort" exactly?  
> >
> > There is none because memcpy() does something sane when size is zero,
> > but to be ISO whatever spec compliant we shouldn't rely on that.
> >  
> >> >> 
> >> >> Signed-off-by: Longpeng <longpeng2@huawei.com>
> >> >> ---
> >> >>  hw/vfio/pci.c | 13 ++++++++-----
> >> >>  1 file changed, 8 insertions(+), 5 deletions(-)
> >> >> 
> >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> >> index 5e75a95..ed798ae 100644
> >> >> --- a/hw/vfio/pci.c
> >> >> +++ b/hw/vfio/pci.c
> >> >> @@ -768,7 +768,7 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
> >> >>      }
> >> >>  }
> >> >>  
> >> >> -static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >> >> +static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >> >>  {
> >> >>      struct vfio_region_info *reg_info;
> >> >>      uint64_t size;
> >> >> @@ -778,7 +778,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >> >>      if (vfio_get_region_info(&vdev->vbasedev,
> >> >>                               VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
> >> >>          error_report("vfio: Error getting ROM info: %m");
> >> >> -        return;
> >> >> +        return false;
> >> >>      }
> >> >>  
> >> >>      trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
> >> >> @@ -797,7 +797,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >> >>          error_printf("Device option ROM contents are probably invalid "
> >> >>                      "(check dmesg).\nSkip option ROM probe with rombar=0, "
> >> >>                      "or load from file with romfile=\n");
> >> >> -        return;
> >> >> +        return false;
> >> >>      }
> >> >>  
> >> >>      vdev->rom = g_malloc(size);
> >> >> @@ -849,6 +849,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> >> >>              data[6] = -csum;
> >> >>          }
> >> >>      }
> >> >> +
> >> >> +    return true;
> >> >>  }
> >> >>  
> >> >>  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
> >> >> @@ -863,8 +865,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)    
> >>     {
> >>         VFIOPCIDevice *vdev = opaque;
> >>         union {
> >>             uint8_t byte;
> >>             uint16_t word;
> >>             uint32_t dword;
> >>             uint64_t qword;
> >>         } val;  
> >> >>      uint64_t data = 0;
> >> >>  
> >> >>      /* Load the ROM lazily when the guest tries to read it */
> >> >> -    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
> >> >> -        vfio_pci_load_rom(vdev);
> >> >> +    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
> >> >> +        !vfio_pci_load_rom(vdev)) {
> >> >> +        return 0;
> >> >>      }
> >> >>  
> >> >>      memcpy(&val, vdev->rom + addr,    
> >> >
> >> > Looks like an obvious bug, until you look at the rest of this memcpy():
> >> >
> >> > memcpy(&val, vdev->rom + addr,
> >> >            (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
> >> >
> >> > IOW, we'll do a zero sized memcpy() if rom_size is zero, so there's no
> >> > risk of the concern identified in the commit log.  This patch is
> >> > unnecessary.  Thanks,    
> >> 
> >> I'm blind: why does !vdev->rom imply !vdev->rom_size?  
> >
> > See vfio_pci_load_rom(), rom_size and rom are set and allocated
> > together.  
> 
> What if vfio_pci_load_rom() isn't called, or returns before it sets
> these guys?

vfio_pci_load_rom() is called from this read function if they're not
set and we haven't triggered a read failure.  They're zero allocated.
The intention is that any vfio_pci_load_rom() failure will leave these
unset and trigger a zero sized memcpy, or now skip the mempcy as in the
patch below.  Thanks,

Alex

> >> Moreover, when MIN(size, vdev->rom_size - addr) < size, we seem to read
> >> uninitialized data from @val:  
> >
> > This is fixed in my patch
> > https://lists.gnu.org/archive/html/qemu-devel/2020-03/msg02778.html  
> 
> Yes.
> 
> >> 
> >>         switch (size) {
> >>         case 1:
> >>             data = val.byte;
> >>             break;
> >>         case 2:
> >>             data = le16_to_cpu(val.word);
> >>             break;
> >>         case 4:
> >>             data = le32_to_cpu(val.dword);
> >>             break;
> >>         default:
> >>             hw_error("vfio: unsupported read size, %d bytes\n", size);
> >>             break;
> >>         }
> >> 
> >>         trace_vfio_rom_read(vdev->vbasedev.name, addr, size, data);
> >> 
> >>         return data;
> >>     }
> >> 
> >> Why is that okay?
> >> 
> >> Why do we initialize @data?  
> >
> > Bug.  The switch was only added later (75bd0c7253f3) and we failed to
> > catch it.  Prior to that we were initializing val and the memcpy() only
> > overwrote it as necessary.  In any case, getting back garbage for the
> > rom when there isn't one generally works ok since the chances of
> > generating a proper rom signature are infinitesimal.  Clearly not what
> > was intended though.
> >  
> >> How can we get to the default case?  If we can get there, is hw_error()
> >> really the right thing to do?  It almost never is...  If getting there
> >> is the guest's fault, we need to tell it off the same way physical
> >> hardware does.  If we should not ever get there (i.e. it's a QEMU bug),
> >> then a plain abort() would be clearer.  
> >
> > AFAIK this is relatively standard, if not somewhat paranoid, handling
> > for a MemoryRegion ops callback.  The MemoryRegionOps code only allows
> > certain size accesses, so it would effectively be an internal error to
> > hit the default case, which seems to be not an uncommon use case of
> > hw_error.  Thanks,  
> 
> Using hw_error() for such programming errors is not helpful.  Everything
> it adds to abort() is useless or misleading.
> 
> In fact, most uses of hw_error() are not helpful.
> 
> But you're going with the flow here.  I accept that.
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5e75a95..ed798ae 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -768,7 +768,7 @@  static void vfio_update_msi(VFIOPCIDevice *vdev)
     }
 }
 
-static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
+static bool vfio_pci_load_rom(VFIOPCIDevice *vdev)
 {
     struct vfio_region_info *reg_info;
     uint64_t size;
@@ -778,7 +778,7 @@  static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
     if (vfio_get_region_info(&vdev->vbasedev,
                              VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
         error_report("vfio: Error getting ROM info: %m");
-        return;
+        return false;
     }
 
     trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
@@ -797,7 +797,7 @@  static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
         error_printf("Device option ROM contents are probably invalid "
                     "(check dmesg).\nSkip option ROM probe with rombar=0, "
                     "or load from file with romfile=\n");
-        return;
+        return false;
     }
 
     vdev->rom = g_malloc(size);
@@ -849,6 +849,8 @@  static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
             data[6] = -csum;
         }
     }
+
+    return true;
 }
 
 static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
@@ -863,8 +865,9 @@  static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
     uint64_t data = 0;
 
     /* Load the ROM lazily when the guest tries to read it */
-    if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
-        vfio_pci_load_rom(vdev);
+    if (unlikely(!vdev->rom && !vdev->rom_read_failed) &&
+        !vfio_pci_load_rom(vdev)) {
+        return 0;
     }
 
     memcpy(&val, vdev->rom + addr,