diff mbox

[v13,09/13] add check reset mechanism when hotplug vfio device

Message ID 7e222e2840fe58fe26d3bd73f626c1da029ca981.1447231392.git.chen.fan.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

Cao jin Nov. 11, 2015, 10:34 a.m. UTC
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Since we support multi-function hotplug. the function 0 indicate
the closure of the slot, so we have the chance to do the check.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
 hw/vfio/pci.c            | 19 +++++++++++++++++++
 hw/vfio/pci.h            |  2 ++
 include/hw/pci/pci_bus.h |  5 +++++
 4 files changed, 55 insertions(+)

Comments

Michael S. Tsirkin Nov. 12, 2015, 11:51 a.m. UTC | #1
On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Since we support multi-function hotplug. the function 0 indicate
> the closure of the slot, so we have the chance to do the check.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
>  hw/vfio/pci.c            | 19 +++++++++++++++++++
>  hw/vfio/pci.h            |  2 ++
>  include/hw/pci/pci_bus.h |  5 +++++
>  4 files changed, 55 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 168b9cc..f6ca6ef 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
>      PCIBus *bus = PCI_BUS(qbus);
>  
>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
>  }
>  
>  static void pci_bus_unrealize(BusState *qbus, Error **errp)
> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
>      return bus->devices[devfn];
>  }
>  
> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
> +{
> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
> +}
> +
> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
> +{
> +    notifier_with_return_remove(notifier);
> +}
> +
> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
> +{
> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
> +                                            opaque);
> +}
> +
>  static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>  {
>      PCIDevice *pci_dev = (PCIDevice *)qdev;
> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>          pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>          return;
>      }
> +
> +    /*
> +     *  If the function is func 0, indicate the closure of the slot.
> +     *  signal the callback.
> +     */
> +    if (DEVICE(pci_dev)->hotplugged &&
> +        pci_get_function_0(pci_dev) == pci_dev &&
> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
> +        error_setg(errp, "failed to hotplug function 0");
> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
> +        return;
> +    }

I don't understand why this is required in pci core.
PCI Device is already constructed anyway.
Just do the checks and call unrealize in vfio.

I also don't see why you are tying this to hotplug.
I would check each function as it's added.
But that's a vfio thing, if both you and Alex think
it's a good idea, fine by me.

>  }
>  
>  static void pci_default_realize(PCIDevice *dev, Error **errp)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 31ffd44..e619998 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1990,6 +1990,19 @@ static int vfio_check_devices_host_bus_reset(void)
>      return 0;
>  }
>  
> +static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
> +{
> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, hotplug_notifier);
> +    PCIDevice *pci_dev = PCI_DEVICE(vdev);
> +    PCIDevice *pci_func0 = opaque;
> +
> +    if (pci_get_function_0(pci_dev) != pci_func0) {
> +        return 0;
> +    }
> +
> +    return vfio_check_host_bus_reset(vdev);
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -2044,6 +2057,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>          return ret;
>      }
>  
> +    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
> +    pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
> +
>      return 0;
>  
>  error:
> @@ -2919,6 +2935,9 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_unregister_req_notifier(vdev);
>      vfio_unregister_err_notifier(vdev);
>      pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
> +    }
>      vfio_disable_interrupts(vdev);
>      if (vdev->intx.mmap_timer) {
>          timer_free(vdev->intx.mmap_timer);
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 59ae194..b385f07 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +
> +    NotifierWithReturn hotplug_notifier;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..7812fa9 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,8 +39,13 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    NotifierWithReturnList hotplug_notifiers;
>  };
>  
> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify);
> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
> +
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
>  
>  /*
> -- 
> 1.9.3
Cao jin Nov. 13, 2015, 3:28 a.m. UTC | #2
On 11/12/2015 07:51 PM, Michael S. Tsirkin wrote:
> On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Since we support multi-function hotplug. the function 0 indicate
>> the closure of the slot, so we have the chance to do the check.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
>>   hw/vfio/pci.c            | 19 +++++++++++++++++++
>>   hw/vfio/pci.h            |  2 ++
>>   include/hw/pci/pci_bus.h |  5 +++++
>>   4 files changed, 55 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 168b9cc..f6ca6ef 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
>>       PCIBus *bus = PCI_BUS(qbus);
>>
>>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
>>   }
>>
>>   static void pci_bus_unrealize(BusState *qbus, Error **errp)
>> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
>>       return bus->devices[devfn];
>>   }
>>
>> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
>> +{
>> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
>> +}
>> +
>> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
>> +{
>> +    notifier_with_return_remove(notifier);
>> +}
>> +
>> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
>> +{
>> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
>> +                                            opaque);
>> +}
>> +
>>   static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>   {
>>       PCIDevice *pci_dev = (PCIDevice *)qdev;
>> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>           pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>>           return;
>>       }
>> +
>> +    /*
>> +     *  If the function is func 0, indicate the closure of the slot.
>> +     *  signal the callback.
>> +     */
>> +    if (DEVICE(pci_dev)->hotplugged &&
>> +        pci_get_function_0(pci_dev) == pci_dev &&
>> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
>> +        error_setg(errp, "failed to hotplug function 0");
>> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>> +        return;
>> +    }
>
> I don't understand why this is required in pci core.
> PCI Device is already constructed anyway.
> Just do the checks and call unrealize in vfio.

Because when do multi-function hotplug, the function 0 on the pcie bus 
probably is not a vfio device. so we should trigger the check from pci 
core.

> I also don't see why you are tying this to hotplug.
> I would check each function as it's added.
> But that's a vfio thing, if both you and Alex think
> it's a good idea, fine by me.

The device is  initialized one by one no matter it is cold plugged or 
hot plugged, but for the vfio with aer that need to get depended devices 
required by bus reset, so need to make sure the reset depended devices 
are assigned to qemu, in vfio, there is a machine done callback to check 
the bus reset for boot up, so it also should be done in hotplug。

it looks little complicated, Alex, any idea?

>
>>   }
>>
>>   static void pci_default_realize(PCIDevice *dev, Error **errp)
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 31ffd44..e619998 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1990,6 +1990,19 @@ static int vfio_check_devices_host_bus_reset(void)
>>       return 0;
>>   }
>>
>> +static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>> +{
>> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, hotplug_notifier);
>> +    PCIDevice *pci_dev = PCI_DEVICE(vdev);
>> +    PCIDevice *pci_func0 = opaque;
>> +
>> +    if (pci_get_function_0(pci_dev) != pci_func0) {
>> +        return 0;
>> +    }
>> +
>> +    return vfio_check_host_bus_reset(vdev);
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>> @@ -2044,6 +2057,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>           return ret;
>>       }
>>
>> +    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
>> +    pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
>> +
>>       return 0;
>>
>>   error:
>> @@ -2919,6 +2935,9 @@ static void vfio_exitfn(PCIDevice *pdev)
>>       vfio_unregister_req_notifier(vdev);
>>       vfio_unregister_err_notifier(vdev);
>>       pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
>> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
>> +    }
>>       vfio_disable_interrupts(vdev);
>>       if (vdev->intx.mmap_timer) {
>>           timer_free(vdev->intx.mmap_timer);
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 59ae194..b385f07 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +
>> +    NotifierWithReturn hotplug_notifier;
>>   } VFIOPCIDevice;
>>
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 403fec6..7812fa9 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -39,8 +39,13 @@ struct PCIBus {
>>          Keep a count of the number of devices with raised IRQs.  */
>>       int nirq;
>>       int *irq_count;
>> +
>> +    NotifierWithReturnList hotplug_notifiers;
>>   };
>>
>> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify);
>> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
>> +
>>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>>
>>   /*
>> --
>> 1.9.3
> .
>
Alex Williamson Nov. 13, 2015, 9:04 p.m. UTC | #3
On Fri, 2015-11-13 at 11:28 +0800, Cao jin wrote:
> 
> On 11/12/2015 07:51 PM, Michael S. Tsirkin wrote:
> > On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
> >> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>
> >> Since we support multi-function hotplug. the function 0 indicate
> >> the closure of the slot, so we have the chance to do the check.
> >>
> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >> ---
> >>   hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
> >>   hw/vfio/pci.c            | 19 +++++++++++++++++++
> >>   hw/vfio/pci.h            |  2 ++
> >>   include/hw/pci/pci_bus.h |  5 +++++
> >>   4 files changed, 55 insertions(+)
> >>
> >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >> index 168b9cc..f6ca6ef 100644
> >> --- a/hw/pci/pci.c
> >> +++ b/hw/pci/pci.c
> >> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
> >>       PCIBus *bus = PCI_BUS(qbus);
> >>
> >>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> >> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
> >>   }
> >>
> >>   static void pci_bus_unrealize(BusState *qbus, Error **errp)
> >> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
> >>       return bus->devices[devfn];
> >>   }
> >>
> >> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
> >> +{
> >> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
> >> +}
> >> +
> >> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
> >> +{
> >> +    notifier_with_return_remove(notifier);
> >> +}
> >> +
> >> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
> >> +{
> >> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
> >> +                                            opaque);
> >> +}
> >> +
> >>   static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >>   {
> >>       PCIDevice *pci_dev = (PCIDevice *)qdev;
> >> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >>           pci_qdev_unrealize(DEVICE(pci_dev), NULL);
> >>           return;
> >>       }
> >> +
> >> +    /*
> >> +     *  If the function is func 0, indicate the closure of the slot.
> >> +     *  signal the callback.
> >> +     */
> >> +    if (DEVICE(pci_dev)->hotplugged &&
> >> +        pci_get_function_0(pci_dev) == pci_dev &&
> >> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
> >> +        error_setg(errp, "failed to hotplug function 0");
> >> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
> >> +        return;
> >> +    }
> >
> > I don't understand why this is required in pci core.
> > PCI Device is already constructed anyway.
> > Just do the checks and call unrealize in vfio.
> 
> Because when do multi-function hotplug, the function 0 on the pcie bus 
> probably is not a vfio device. so we should trigger the check from pci 
> core.
> 
> > I also don't see why you are tying this to hotplug.
> > I would check each function as it's added.
> > But that's a vfio thing, if both you and Alex think
> > it's a good idea, fine by me.
> 
> The device is  initialized one by one no matter it is cold plugged or 
> hot plugged, but for the vfio with aer that need to get depended devices 
> required by bus reset, so need to make sure the reset depended devices 
> are assigned to qemu, in vfio, there is a machine done callback to check 
> the bus reset for boot up, so it also should be done in hotplug。
> 
> it looks little complicated, Alex, any idea?


So the problem is that to support AER we need to be able to do a bus
reset of the device, both in the virtual and physical spaces.  A
physical bus reset is likely to affect more than a single device since
we're often dealing with multifunction endpoints.  Those functions may
be considered isolated on the host due to ACS, but we cannot reset the
bus without affecting all of the functions.  Therefore, we need to test
whether we have a compatible setup, but it involves more than a single
device.  We cannot test each device as it is initialized because any
time more than one device is affected, and we haven't yet added the
other devices, we'll fail the test.

There are two separate cases where we need to solve this problem,
coldplug and hotplug.  Coldplug can be resolved by using the
machine-init done notifier to verify that our configuration is
compatible.  We have no requirements for the ordering of devices during
cold initialization.  For the hotplug case, we've defined that function
0 closes the slot, which provides an opportunity for us to do the same
verification.  However, function 0 is not necessarily a vfio-pci device.
We can create our own multifunction devices in the VM, where function 0
could be any type of pci device.  Thus vfio-pci cannot notify itself
when a slot is closed and due to the above mentioned problem, we cannot
verify as each device is added.

So, I don't really see a better way to solve the problem than what's
being proposed here.  Thanks,

Alex
chenfan Nov. 16, 2015, 10:18 a.m. UTC | #4
Hi Alex,

   Thanks for your detailed explanation.
   during my test, I found that maybe there was another problem in vfio 
driver,
I use a dual-port NIC which address are: 06:00.0 and 06:00.1 two functions.
then I use aer-inject to inject one error to one function like following:
AER
ID 0000:06:00.0
UNCOR_STATUS DLP
HEADER_LOG 0 1 2 3

here I boot qemu with one enable aer, one disable aer:
./x86_64-softmmu/qemu-system-x86_64 -M q35 -device 
ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1
  -device vfio-pci,host=06:00.1,bus=bridge1,addr=00.1
  -device 
vfio-pci,host=06:00.0,bus=bridge1,addr=00.0,aer=true,multifunction=on

so we expected that the error only sent to the vfio device with host 
address is 06:00.0,
but I found that all devices (06:00.0 , 06:00.1) receive the signal in 
qemu, which sent by vfio driver
in vfio_pci_aer_err_detected. then qemu stopped by the device with 
06:00.1 received the signal.
is that right?

Thanks,
Chen


On 11/14/2015 05:04 AM, Alex Williamson wrote:
> On Fri, 2015-11-13 at 11:28 +0800, Cao jin wrote:
>> On 11/12/2015 07:51 PM, Michael S. Tsirkin wrote:
>>> On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
>>>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>>
>>>> Since we support multi-function hotplug. the function 0 indicate
>>>> the closure of the slot, so we have the chance to do the check.
>>>>
>>>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>> ---
>>>>    hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
>>>>    hw/vfio/pci.c            | 19 +++++++++++++++++++
>>>>    hw/vfio/pci.h            |  2 ++
>>>>    include/hw/pci/pci_bus.h |  5 +++++
>>>>    4 files changed, 55 insertions(+)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index 168b9cc..f6ca6ef 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
>>>>        PCIBus *bus = PCI_BUS(qbus);
>>>>
>>>>        vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>>>> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
>>>>    }
>>>>
>>>>    static void pci_bus_unrealize(BusState *qbus, Error **errp)
>>>> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
>>>>        return bus->devices[devfn];
>>>>    }
>>>>
>>>> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
>>>> +{
>>>> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
>>>> +}
>>>> +
>>>> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
>>>> +{
>>>> +    notifier_with_return_remove(notifier);
>>>> +}
>>>> +
>>>> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
>>>> +{
>>>> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
>>>> +                                            opaque);
>>>> +}
>>>> +
>>>>    static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>>    {
>>>>        PCIDevice *pci_dev = (PCIDevice *)qdev;
>>>> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>>            pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>>>>            return;
>>>>        }
>>>> +
>>>> +    /*
>>>> +     *  If the function is func 0, indicate the closure of the slot.
>>>> +     *  signal the callback.
>>>> +     */
>>>> +    if (DEVICE(pci_dev)->hotplugged &&
>>>> +        pci_get_function_0(pci_dev) == pci_dev &&
>>>> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
>>>> +        error_setg(errp, "failed to hotplug function 0");
>>>> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>>>> +        return;
>>>> +    }
>>> I don't understand why this is required in pci core.
>>> PCI Device is already constructed anyway.
>>> Just do the checks and call unrealize in vfio.
>> Because when do multi-function hotplug, the function 0 on the pcie bus
>> probably is not a vfio device. so we should trigger the check from pci
>> core.
>>
>>> I also don't see why you are tying this to hotplug.
>>> I would check each function as it's added.
>>> But that's a vfio thing, if both you and Alex think
>>> it's a good idea, fine by me.
>> The device is  initialized one by one no matter it is cold plugged or
>> hot plugged, but for the vfio with aer that need to get depended devices
>> required by bus reset, so need to make sure the reset depended devices
>> are assigned to qemu, in vfio, there is a machine done callback to check
>> the bus reset for boot up, so it also should be done in hotplug。
>>
>> it looks little complicated, Alex, any idea?
>
> So the problem is that to support AER we need to be able to do a bus
> reset of the device, both in the virtual and physical spaces.  A
> physical bus reset is likely to affect more than a single device since
> we're often dealing with multifunction endpoints.  Those functions may
> be considered isolated on the host due to ACS, but we cannot reset the
> bus without affecting all of the functions.  Therefore, we need to test
> whether we have a compatible setup, but it involves more than a single
> device.  We cannot test each device as it is initialized because any
> time more than one device is affected, and we haven't yet added the
> other devices, we'll fail the test.
>
> There are two separate cases where we need to solve this problem,
> coldplug and hotplug.  Coldplug can be resolved by using the
> machine-init done notifier to verify that our configuration is
> compatible.  We have no requirements for the ordering of devices during
> cold initialization.  For the hotplug case, we've defined that function
> 0 closes the slot, which provides an opportunity for us to do the same
> verification.  However, function 0 is not necessarily a vfio-pci device.
> We can create our own multifunction devices in the VM, where function 0
> could be any type of pci device.  Thus vfio-pci cannot notify itself
> when a slot is closed and due to the above mentioned problem, we cannot
> verify as each device is added.
>
> So, I don't really see a better way to solve the problem than what's
> being proposed here.  Thanks,
>
> Alex
>
> .
>
Alex Williamson Nov. 16, 2015, 4:05 p.m. UTC | #5
On Mon, 2015-11-16 at 18:18 +0800, Chen Fan wrote:
> Hi Alex,
> 
>    Thanks for your detailed explanation.
>    during my test, I found that maybe there was another problem in vfio 
> driver,
> I use a dual-port NIC which address are: 06:00.0 and 06:00.1 two functions.
> then I use aer-inject to inject one error to one function like following:
> AER
> ID 0000:06:00.0
> UNCOR_STATUS DLP
> HEADER_LOG 0 1 2 3
> 
> here I boot qemu with one enable aer, one disable aer:
> ./x86_64-softmmu/qemu-system-x86_64 -M q35 -device 
> ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1
>   -device vfio-pci,host=06:00.1,bus=bridge1,addr=00.1
>   -device 
> vfio-pci,host=06:00.0,bus=bridge1,addr=00.0,aer=true,multifunction=on
> 
> so we expected that the error only sent to the vfio device with host 
> address is 06:00.0,
> but I found that all devices (06:00.0 , 06:00.1) receive the signal in 
> qemu, which sent by vfio driver
> in vfio_pci_aer_err_detected. then qemu stopped by the device with 
> 06:00.1 received the signal.
> is that right?

You would need to know whether the response for the injected AER affects
all devices on the link or is isolated to the function specified.  VFIO
is just a passthrough for pci_error_handlers, so if error_detected is
getting called for each host device, it's going to signal each device to
the user.  Thanks,

Alex
chenfan Nov. 17, 2015, 2:48 a.m. UTC | #6
On 11/17/2015 12:05 AM, Alex Williamson wrote:
> On Mon, 2015-11-16 at 18:18 +0800, Chen Fan wrote:
>> Hi Alex,
>>
>>     Thanks for your detailed explanation.
>>     during my test, I found that maybe there was another problem in vfio
>> driver,
>> I use a dual-port NIC which address are: 06:00.0 and 06:00.1 two functions.
>> then I use aer-inject to inject one error to one function like following:
>> AER
>> ID 0000:06:00.0
>> UNCOR_STATUS DLP
>> HEADER_LOG 0 1 2 3
>>
>> here I boot qemu with one enable aer, one disable aer:
>> ./x86_64-softmmu/qemu-system-x86_64 -M q35 -device
>> ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1
>>    -device vfio-pci,host=06:00.1,bus=bridge1,addr=00.1
>>    -device
>> vfio-pci,host=06:00.0,bus=bridge1,addr=00.0,aer=true,multifunction=on
>>
>> so we expected that the error only sent to the vfio device with host
>> address is 06:00.0,
>> but I found that all devices (06:00.0 , 06:00.1) receive the signal in
>> qemu, which sent by vfio driver
>> in vfio_pci_aer_err_detected. then qemu stopped by the device with
>> 06:00.1 received the signal.
>> is that right?
> You would need to know whether the response for the injected AER affects
> all devices on the link or is isolated to the function specified.  VFIO
> is just a passthrough for pci_error_handlers, so if error_detected is
> getting called for each host device, it's going to signal each device to
> the user.  Thanks,
I saw that in broadcast error message, if the error is reported by an 
end point,
aer driver would broadcast the error to all functions under the upstream 
link of the end point.
so here in qemu, I think we should enable AER for all functions in one 
endpoint.

Thanks,
Chen


> Alex
>
> .
>
diff mbox

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..f6ca6ef 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -81,6 +81,7 @@  static void pci_bus_realize(BusState *qbus, Error **errp)
     PCIBus *bus = PCI_BUS(qbus);
 
     vmstate_register(NULL, -1, &vmstate_pcibus, bus);
+    notifier_with_return_list_init(&bus->hotplug_notifiers);
 }
 
 static void pci_bus_unrealize(BusState *qbus, Error **errp)
@@ -1835,6 +1836,22 @@  PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
     return bus->devices[devfn];
 }
 
+void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
+{
+    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
+}
+
+void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
+{
+    notifier_with_return_remove(notifier);
+}
+
+static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
+{
+    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
+                                            opaque);
+}
+
 static void pci_qdev_realize(DeviceState *qdev, Error **errp)
 {
     PCIDevice *pci_dev = (PCIDevice *)qdev;
@@ -1877,6 +1894,18 @@  static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_qdev_unrealize(DEVICE(pci_dev), NULL);
         return;
     }
+
+    /*
+     *  If the function is func 0, indicate the closure of the slot.
+     *  signal the callback.
+     */
+    if (DEVICE(pci_dev)->hotplugged &&
+        pci_get_function_0(pci_dev) == pci_dev &&
+        pci_bus_hotplug_notifier(bus, pci_dev)) {
+        error_setg(errp, "failed to hotplug function 0");
+        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+        return;
+    }
 }
 
 static void pci_default_realize(PCIDevice *dev, Error **errp)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 31ffd44..e619998 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1990,6 +1990,19 @@  static int vfio_check_devices_host_bus_reset(void)
     return 0;
 }
 
+static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
+{
+    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, hotplug_notifier);
+    PCIDevice *pci_dev = PCI_DEVICE(vdev);
+    PCIDevice *pci_func0 = opaque;
+
+    if (pci_get_function_0(pci_dev) != pci_func0) {
+        return 0;
+    }
+
+    return vfio_check_host_bus_reset(vdev);
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -2044,6 +2057,9 @@  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
         return ret;
     }
 
+    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
+    pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
+
     return 0;
 
 error:
@@ -2919,6 +2935,9 @@  static void vfio_exitfn(PCIDevice *pdev)
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
+    }
     vfio_disable_interrupts(vdev);
     if (vdev->intx.mmap_timer) {
         timer_free(vdev->intx.mmap_timer);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 59ae194..b385f07 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -142,6 +142,8 @@  typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+
+    NotifierWithReturn hotplug_notifier;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..7812fa9 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -39,8 +39,13 @@  struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    NotifierWithReturnList hotplug_notifiers;
 };
 
+void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify);
+void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
+
 typedef struct PCIBridgeWindows PCIBridgeWindows;
 
 /*