Message ID | d3b4848742510cb233f17e2ba94acde0550db3c9.1447748073.git.chen.fan.fnst@cn.fujitsu.com |
---|---|
State | New |
Headers | show |
On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote: > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > when init vfio devices done, we should test all the devices supported > aer whether conflict with others. For each one, get the hot reset > info for the affected device list. For each affected device, all > should attach to the VM and on/below the same bus. also, we should test > all of the non-AER supporting vfio-pci devices on or below the target > bus to verify they have a reset mechanism. > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > --- > hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > hw/vfio/pci.h | 1 + > 2 files changed, 230 insertions(+), 7 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index d00b0e4..6926dcc 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) > return 0; > } > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1, > + PCIHostDeviceAddress *host2) > +{ > + return (host1->domain == host2->domain && host1->bus == host2->bus && > + host1->slot == host2->slot); > +} > + > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > + PCIHostDeviceAddress *host2) > +{ > + return (vfio_pci_host_slot_match(host1, host2) && > + host1->function == host2->function); > +} > + > +struct VFIODeviceFind { > + PCIDevice *pdev; > + bool found; > +}; > + > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev, > + void *opaque) > +{ > + DeviceState *dev = DEVICE(pdev); > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > + VFIOPCIDevice *vdev; > + struct VFIODeviceFind *find = opaque; > + > + if (find->found) { > + return; > + } > + > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > + if (!dc->reset) { > + goto found; > + } > + return; > + } > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > + !vdev->vbasedev.reset_works) { > + goto found; > + } > + > + return; > +found: > + find->pdev = pdev; > + find->found = true; > +} > + > +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque) > +{ > + struct VFIODeviceFind *find = opaque; > + > + if (find->found) { > + return; > + } > + > + if (pdev == find->pdev) { > + find->found = true; > + } > +} > + > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > +{ > + PCIBus *bus = vdev->pdev.bus; > + struct vfio_pci_hot_reset_info *info = NULL; > + struct vfio_pci_dependent_device *devices; > + VFIOGroup *group; > + struct VFIODeviceFind find; > + int ret, i; > + > + ret = vfio_get_hot_reset_info(vdev, &info); > + if (ret) { > + error_report("vfio: Cannot enable AER for device %s," > + " device does not support hot reset.", > + vdev->vbasedev.name); > + goto out; > + } > + > + /* List all affected devices by bus reset */ > + devices = &info->devices[0]; > + > + /* Verify that we have all the groups required */ > + for (i = 0; i < info->count; i++) { > + PCIHostDeviceAddress host; > + VFIOPCIDevice *tmp; > + VFIODevice *vbasedev_iter; > + bool found = false; > + > + host.domain = devices[i].segment; > + host.bus = devices[i].bus; > + host.slot = PCI_SLOT(devices[i].devfn); > + host.function = PCI_FUNC(devices[i].devfn); > + > + /* Skip the current device */ > + if (vfio_pci_host_match(&host, &vdev->host)) { > + continue; > + } > + > + /* Ensure we own the group of the affected device */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + if (group->groupid == devices[i].group_id) { > + break; > + } > + } > + > + if (!group) { > + error_report("vfio: Cannot enable AER for device %s, " > + "depends on group %d which is not owned.", > + vdev->vbasedev.name, devices[i].group_id); > + ret = -1; > + goto out; > + } > + > + /* Ensure affected devices for reset on/blow the bus */ > + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); > + if (vfio_pci_host_match(&host, &tmp->host)) { > + PCIDevice *pci = PCI_DEVICE(tmp); > + > + /* > + * For multifunction device, due to vfio driver signal all > + * functions under the upstream link of the end point. here > + * we validate all functions whether enable AER. > + */ > + if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) && > + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { > + error_report("vfio: Cannot enable AER for device %s, on same slot" > + " the dependent device %s which does not enable AER.", > + vdev->vbasedev.name, tmp->vbasedev.name); > + ret = -1; > + goto out; > + } It took me a while to understand this code block, so I've updated the comment to read as follows: /* * AER errors may be broadcast to all functions of a multi- * function endpoint. If any of those sibling functions are * also assigned, they need to have AER enabled or else an * error may continue to cause a vm_stop condition. IOW, * AER setup of this function would be pointless. */ Does that match your intention? > + > + find.pdev = pci; > + find.found = false; > + pci_for_each_device(bus, pci_bus_num(bus), > + device_find, &find); > + if (!find.found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the dependent device %s is not under the same bus", > + vdev->vbasedev.name, tmp->vbasedev.name); > + ret = -1; > + goto out; > + } > + found = true; > + break; > + } > + } > + > + /* Ensure all affected devices assigned to VM */ > + if (!found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the dependent device %04x:%02x:%02x.%x " > + "is not assigned to VM.", > + vdev->vbasedev.name, host.domain, host.bus, > + host.slot, host.function); > + ret = -1; > + goto out; > + } > + } > + > + /* > + * Check the all pci devices on or below the target bus > + * have a reset mechanism at least. > + */ > + find.pdev = NULL; > + find.found = false; > + pci_for_each_device(bus, pci_bus_num(bus), > + vfio_check_device_noreset, &find); > + if (find.found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the affected device %s does not have a reset mechanism.", > + vdev->vbasedev.name, find.pdev->name); > + ret = -1; > + goto out; > + } > + > + ret = 0; > +out: > + g_free(info); > + return ret; > +} > + > +static int vfio_check_devices_host_bus_reset(void) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + VFIOPCIDevice *vdev; > + > + /* Check All vfio-pci devices if have bus reset capability */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + if ((vdev->features & VFIO_FEATURE_ENABLE_AER) && > + vfio_check_host_bus_reset(vdev)) { > + return -1; > + } > + } > + } > + > + return 0; > +} > + > static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, > int pos, uint16_t size) > { > @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) > vfio_intx_enable(vdev); > } > > -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > - PCIHostDeviceAddress *host2) > -{ > - return (host1->domain == host2->domain && host1->bus == host2->bus && > - host1->slot == host2->slot && host1->function == host2->function); > -} > - > static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) > { > VFIOGroup *group; > @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) > vdev->req_enabled = false; > } > > +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused) > +{ > + int ret; > + > + ret = vfio_check_devices_host_bus_reset(); > + if (ret) { > + exit(1); > + } > +} > + > +static Notifier machine_notifier = { > + .notify = vfio_pci_machine_done_notify, > +}; > + > static int vfio_initfn(PCIDevice *pdev) > { > VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = { > static void register_vfio_pci_dev_type(void) > { > type_register_static(&vfio_pci_dev_info); > + /* > + * Register notifier when machine init is done, since we need > + * check the configration manner after all vfio device are inited. > + */ > + qemu_add_machine_init_done_notifier(&machine_notifier); > } > > type_init(register_vfio_pci_dev_type) > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index 48c1f69..59ae194 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -15,6 +15,7 @@ > #include "qemu-common.h" > #include "exec/memory.h" > #include "hw/pci/pci.h" > +#include "hw/pci/pci_bus.h" > #include "hw/pci/pci_bridge.h" > #include "hw/vfio/vfio-common.h" > #include "qemu/event_notifier.h"
On 12/18/2015 04:32 AM, Alex Williamson wrote: > On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote: >> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> >> >> when init vfio devices done, we should test all the devices supported >> aer whether conflict with others. For each one, get the hot reset >> info for the affected device list. For each affected device, all >> should attach to the VM and on/below the same bus. also, we should test >> all of the non-AER supporting vfio-pci devices on or below the target >> bus to verify they have a reset mechanism. >> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> >> --- >> hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- >> hw/vfio/pci.h | 1 + >> 2 files changed, 230 insertions(+), 7 deletions(-) >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index d00b0e4..6926dcc 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) >> return 0; >> } >> >> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1, >> + PCIHostDeviceAddress *host2) >> +{ >> + return (host1->domain == host2->domain && host1->bus == host2->bus && >> + host1->slot == host2->slot); >> +} >> + >> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, >> + PCIHostDeviceAddress *host2) >> +{ >> + return (vfio_pci_host_slot_match(host1, host2) && >> + host1->function == host2->function); >> +} >> + >> +struct VFIODeviceFind { >> + PCIDevice *pdev; >> + bool found; >> +}; >> + >> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev, >> + void *opaque) >> +{ >> + DeviceState *dev = DEVICE(pdev); >> + DeviceClass *dc = DEVICE_GET_CLASS(dev); >> + VFIOPCIDevice *vdev; >> + struct VFIODeviceFind *find = opaque; >> + >> + if (find->found) { >> + return; >> + } >> + >> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { >> + if (!dc->reset) { >> + goto found; >> + } >> + return; >> + } >> + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); >> + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && >> + !vdev->vbasedev.reset_works) { >> + goto found; >> + } >> + >> + return; >> +found: >> + find->pdev = pdev; >> + find->found = true; >> +} >> + >> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque) >> +{ >> + struct VFIODeviceFind *find = opaque; >> + >> + if (find->found) { >> + return; >> + } >> + >> + if (pdev == find->pdev) { >> + find->found = true; >> + } >> +} >> + >> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) >> +{ >> + PCIBus *bus = vdev->pdev.bus; >> + struct vfio_pci_hot_reset_info *info = NULL; >> + struct vfio_pci_dependent_device *devices; >> + VFIOGroup *group; >> + struct VFIODeviceFind find; >> + int ret, i; >> + >> + ret = vfio_get_hot_reset_info(vdev, &info); >> + if (ret) { >> + error_report("vfio: Cannot enable AER for device %s," >> + " device does not support hot reset.", >> + vdev->vbasedev.name); >> + goto out; >> + } >> + >> + /* List all affected devices by bus reset */ >> + devices = &info->devices[0]; >> + >> + /* Verify that we have all the groups required */ >> + for (i = 0; i < info->count; i++) { >> + PCIHostDeviceAddress host; >> + VFIOPCIDevice *tmp; >> + VFIODevice *vbasedev_iter; >> + bool found = false; >> + >> + host.domain = devices[i].segment; >> + host.bus = devices[i].bus; >> + host.slot = PCI_SLOT(devices[i].devfn); >> + host.function = PCI_FUNC(devices[i].devfn); >> + >> + /* Skip the current device */ >> + if (vfio_pci_host_match(&host, &vdev->host)) { >> + continue; >> + } >> + >> + /* Ensure we own the group of the affected device */ >> + QLIST_FOREACH(group, &vfio_group_list, next) { >> + if (group->groupid == devices[i].group_id) { >> + break; >> + } >> + } >> + >> + if (!group) { >> + error_report("vfio: Cannot enable AER for device %s, " >> + "depends on group %d which is not owned.", >> + vdev->vbasedev.name, devices[i].group_id); >> + ret = -1; >> + goto out; >> + } >> + >> + /* Ensure affected devices for reset on/blow the bus */ >> + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { >> + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { >> + continue; >> + } >> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); >> + if (vfio_pci_host_match(&host, &tmp->host)) { >> + PCIDevice *pci = PCI_DEVICE(tmp); >> + >> + /* >> + * For multifunction device, due to vfio driver signal all >> + * functions under the upstream link of the end point. here >> + * we validate all functions whether enable AER. >> + */ >> + if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) && >> + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { >> + error_report("vfio: Cannot enable AER for device %s, on same slot" >> + " the dependent device %s which does not enable AER.", >> + vdev->vbasedev.name, tmp->vbasedev.name); >> + ret = -1; >> + goto out; >> + } > It took me a while to understand this code block, so I've updated the > comment to read as follows: > > /* > * AER errors may be broadcast to all functions of a multi- > * function endpoint. If any of those sibling functions are > * also assigned, they need to have AER enabled or else an > * error may continue to cause a vm_stop condition. IOW, > * AER setup of this function would be pointless. > */ > > Does that match your intention? Yes, this is the code's mean. your explanation is much better, I will update the common in next version. Thanks, Chen > >> + >> + find.pdev = pci; >> + find.found = false; >> + pci_for_each_device(bus, pci_bus_num(bus), >> + device_find, &find); >> + if (!find.found) { >> + error_report("vfio: Cannot enable AER for device %s, " >> + "the dependent device %s is not under the same bus", >> + vdev->vbasedev.name, tmp->vbasedev.name); >> + ret = -1; >> + goto out; >> + } >> + found = true; >> + break; >> + } >> + } >> + >> + /* Ensure all affected devices assigned to VM */ >> + if (!found) { >> + error_report("vfio: Cannot enable AER for device %s, " >> + "the dependent device %04x:%02x:%02x.%x " >> + "is not assigned to VM.", >> + vdev->vbasedev.name, host.domain, host.bus, >> + host.slot, host.function); >> + ret = -1; >> + goto out; >> + } >> + } >> + >> + /* >> + * Check the all pci devices on or below the target bus >> + * have a reset mechanism at least. >> + */ >> + find.pdev = NULL; >> + find.found = false; >> + pci_for_each_device(bus, pci_bus_num(bus), >> + vfio_check_device_noreset, &find); >> + if (find.found) { >> + error_report("vfio: Cannot enable AER for device %s, " >> + "the affected device %s does not have a reset mechanism.", >> + vdev->vbasedev.name, find.pdev->name); >> + ret = -1; >> + goto out; >> + } >> + >> + ret = 0; >> +out: >> + g_free(info); >> + return ret; >> +} >> + >> +static int vfio_check_devices_host_bus_reset(void) >> +{ >> + VFIOGroup *group; >> + VFIODevice *vbasedev; >> + VFIOPCIDevice *vdev; >> + >> + /* Check All vfio-pci devices if have bus reset capability */ >> + QLIST_FOREACH(group, &vfio_group_list, next) { >> + QLIST_FOREACH(vbasedev, &group->device_list, next) { >> + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { >> + continue; >> + } >> + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >> + if ((vdev->features & VFIO_FEATURE_ENABLE_AER) && >> + vfio_check_host_bus_reset(vdev)) { >> + return -1; >> + } >> + } >> + } >> + >> + return 0; >> +} >> + >> static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, >> int pos, uint16_t size) >> { >> @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) >> vfio_intx_enable(vdev); >> } >> >> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, >> - PCIHostDeviceAddress *host2) >> -{ >> - return (host1->domain == host2->domain && host1->bus == host2->bus && >> - host1->slot == host2->slot && host1->function == host2->function); >> -} >> - >> static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) >> { >> VFIOGroup *group; >> @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) >> vdev->req_enabled = false; >> } >> >> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused) >> +{ >> + int ret; >> + >> + ret = vfio_check_devices_host_bus_reset(); >> + if (ret) { >> + exit(1); >> + } >> +} >> + >> +static Notifier machine_notifier = { >> + .notify = vfio_pci_machine_done_notify, >> +}; >> + >> static int vfio_initfn(PCIDevice *pdev) >> { >> VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); >> @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = { >> static void register_vfio_pci_dev_type(void) >> { >> type_register_static(&vfio_pci_dev_info); >> + /* >> + * Register notifier when machine init is done, since we need >> + * check the configration manner after all vfio device are inited. >> + */ >> + qemu_add_machine_init_done_notifier(&machine_notifier); >> } >> >> type_init(register_vfio_pci_dev_type) >> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h >> index 48c1f69..59ae194 100644 >> --- a/hw/vfio/pci.h >> +++ b/hw/vfio/pci.h >> @@ -15,6 +15,7 @@ >> #include "qemu-common.h" >> #include "exec/memory.h" >> #include "hw/pci/pci.h" >> +#include "hw/pci/pci_bus.h" >> #include "hw/pci/pci_bridge.h" >> #include "hw/vfio/vfio-common.h" >> #include "qemu/event_notifier.h" > > > . >
On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote: > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > when init vfio devices done, we should test all the devices supported > aer whether conflict with others. For each one, get the hot reset > info for the affected device list. For each affected device, all > should attach to the VM and on/below the same bus. also, we should test > all of the non-AER supporting vfio-pci devices on or below the target > bus to verify they have a reset mechanism. > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > --- > hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > hw/vfio/pci.h | 1 + > 2 files changed, 230 insertions(+), 7 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index d00b0e4..6926dcc 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) > return 0; > } > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1, > + PCIHostDeviceAddress *host2) > +{ > + return (host1->domain == host2->domain && host1->bus == host2->bus && > + host1->slot == host2->slot); > +} > + > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > + PCIHostDeviceAddress *host2) > +{ > + return (vfio_pci_host_slot_match(host1, host2) && > + host1->function == host2->function); > +} > + > +struct VFIODeviceFind { > + PCIDevice *pdev; > + bool found; > +}; > + > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev, > + void *opaque) > +{ > + DeviceState *dev = DEVICE(pdev); > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > + VFIOPCIDevice *vdev; > + struct VFIODeviceFind *find = opaque; > + > + if (find->found) { > + return; > + } > + > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > + if (!dc->reset) { > + goto found; > + } > + return; > + } > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > + !vdev->vbasedev.reset_works) { > + goto found; > + } > + > + return; > +found: > + find->pdev = pdev; > + find->found = true; > +} > + > +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque) > +{ > + struct VFIODeviceFind *find = opaque; > + > + if (find->found) { > + return; > + } > + > + if (pdev == find->pdev) { > + find->found = true; > + } > +} > + > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > +{ > + PCIBus *bus = vdev->pdev.bus; > + struct vfio_pci_hot_reset_info *info = NULL; > + struct vfio_pci_dependent_device *devices; > + VFIOGroup *group; > + struct VFIODeviceFind find; > + int ret, i; > + > + ret = vfio_get_hot_reset_info(vdev, &info); > + if (ret) { > + error_report("vfio: Cannot enable AER for device %s," > + " device does not support hot reset.", > + vdev->vbasedev.name); > + goto out; > + } > + > + /* List all affected devices by bus reset */ > + devices = &info->devices[0]; > + > + /* Verify that we have all the groups required */ > + for (i = 0; i < info->count; i++) { > + PCIHostDeviceAddress host; > + VFIOPCIDevice *tmp; > + VFIODevice *vbasedev_iter; > + bool found = false; > + > + host.domain = devices[i].segment; > + host.bus = devices[i].bus; > + host.slot = PCI_SLOT(devices[i].devfn); > + host.function = PCI_FUNC(devices[i].devfn); > + > + /* Skip the current device */ > + if (vfio_pci_host_match(&host, &vdev->host)) { > + continue; > + } > + > + /* Ensure we own the group of the affected device */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + if (group->groupid == devices[i].group_id) { > + break; > + } > + } > + > + if (!group) { > + error_report("vfio: Cannot enable AER for device %s, " > + "depends on group %d which is not owned.", > + vdev->vbasedev.name, devices[i].group_id); > + ret = -1; > + goto out; > + } > + > + /* Ensure affected devices for reset on/blow the bus */ > + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); > + if (vfio_pci_host_match(&host, &tmp->host)) { > + PCIDevice *pci = PCI_DEVICE(tmp); > + > + /* > + * For multifunction device, due to vfio driver signal all > + * functions under the upstream link of the end point. here > + * we validate all functions whether enable AER. > + */ > + if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) && > + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { > + error_report("vfio: Cannot enable AER for device %s, on same slot" > + " the dependent device %s which does not enable AER.", > + vdev->vbasedev.name, tmp->vbasedev.name); > + ret = -1; > + goto out; > + } > + > + find.pdev = pci; > + find.found = false; > + pci_for_each_device(bus, pci_bus_num(bus), > + device_find, &find); > + if (!find.found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the dependent device %s is not under the same bus", > + vdev->vbasedev.name, tmp->vbasedev.name); > + ret = -1; > + goto out; > + } > + found = true; > + break; > + } > + } > + > + /* Ensure all affected devices assigned to VM */ I am puzzled. Does not kernel enforce this already? If not it's a security problem. If yes why does userspace need to check this? > + if (!found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the dependent device %04x:%02x:%02x.%x " > + "is not assigned to VM.", > + vdev->vbasedev.name, host.domain, host.bus, > + host.slot, host.function); > + ret = -1; > + goto out; > + } > + } > + > + /* > + * Check the all pci devices on or below the target bus > + * have a reset mechanism at least. > + */ > + find.pdev = NULL; > + find.found = false; > + pci_for_each_device(bus, pci_bus_num(bus), > + vfio_check_device_noreset, &find); > + if (find.found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the affected device %s does not have a reset mechanism.", > + vdev->vbasedev.name, find.pdev->name); > + ret = -1; > + goto out; > + } > + > + ret = 0; > +out: > + g_free(info); > + return ret; > +} > + > +static int vfio_check_devices_host_bus_reset(void) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + VFIOPCIDevice *vdev; > + > + /* Check All vfio-pci devices if have bus reset capability */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + if ((vdev->features & VFIO_FEATURE_ENABLE_AER) && > + vfio_check_host_bus_reset(vdev)) { > + return -1; > + } > + } > + } > + > + return 0; > +} > + > static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, > int pos, uint16_t size) > { > @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) > vfio_intx_enable(vdev); > } > > -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > - PCIHostDeviceAddress *host2) > -{ > - return (host1->domain == host2->domain && host1->bus == host2->bus && > - host1->slot == host2->slot && host1->function == host2->function); > -} > - > static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) > { > VFIOGroup *group; > @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) > vdev->req_enabled = false; > } > > +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused) > +{ > + int ret; > + > + ret = vfio_check_devices_host_bus_reset(); > + if (ret) { > + exit(1); > + } > +} > + > +static Notifier machine_notifier = { > + .notify = vfio_pci_machine_done_notify, > +}; > + > static int vfio_initfn(PCIDevice *pdev) > { > VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = { > static void register_vfio_pci_dev_type(void) > { > type_register_static(&vfio_pci_dev_info); > + /* > + * Register notifier when machine init is done, since we need > + * check the configration manner after all vfio device are inited. > + */ > + qemu_add_machine_init_done_notifier(&machine_notifier); > } > > type_init(register_vfio_pci_dev_type) > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index 48c1f69..59ae194 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -15,6 +15,7 @@ > #include "qemu-common.h" > #include "exec/memory.h" > #include "hw/pci/pci.h" > +#include "hw/pci/pci_bus.h" > #include "hw/pci/pci_bridge.h" > #include "hw/vfio/vfio-common.h" > #include "qemu/event_notifier.h" > -- > 1.9.3 > >
On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote: > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote: > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > when init vfio devices done, we should test all the devices > > supported > > aer whether conflict with others. For each one, get the hot reset > > info for the affected device list. For each affected device, all > > should attach to the VM and on/below the same bus. also, we should > > test > > all of the non-AER supporting vfio-pci devices on or below the > > target > > bus to verify they have a reset mechanism. > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > --- > > hw/vfio/pci.c | 236 > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > hw/vfio/pci.h | 1 + > > 2 files changed, 230 insertions(+), 7 deletions(-) > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > index d00b0e4..6926dcc 100644 > > --- a/hw/vfio/pci.c > > +++ b/hw/vfio/pci.c > > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice > > *vdev, uint8_t pos) > > return 0; > > } > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1, > > + PCIHostDeviceAddress *host2) > > +{ > > + return (host1->domain == host2->domain && host1->bus == host2- > > >bus && > > + host1->slot == host2->slot); > > +} > > + > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > > + PCIHostDeviceAddress *host2) > > +{ > > + return (vfio_pci_host_slot_match(host1, host2) && > > + host1->function == host2->function); > > +} > > + > > +struct VFIODeviceFind { > > + PCIDevice *pdev; > > + bool found; > > +}; > > + > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice > > *pdev, > > + void *opaque) > > +{ > > + DeviceState *dev = DEVICE(pdev); > > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > > + VFIOPCIDevice *vdev; > > + struct VFIODeviceFind *find = opaque; > > + > > + if (find->found) { > > + return; > > + } > > + > > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > > + if (!dc->reset) { > > + goto found; > > + } > > + return; > > + } > > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > > + !vdev->vbasedev.reset_works) { > > + goto found; > > + } > > + > > + return; > > +found: > > + find->pdev = pdev; > > + find->found = true; > > +} > > + > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void > > *opaque) > > +{ > > + struct VFIODeviceFind *find = opaque; > > + > > + if (find->found) { > > + return; > > + } > > + > > + if (pdev == find->pdev) { > > + find->found = true; > > + } > > +} > > + > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > > +{ > > + PCIBus *bus = vdev->pdev.bus; > > + struct vfio_pci_hot_reset_info *info = NULL; > > + struct vfio_pci_dependent_device *devices; > > + VFIOGroup *group; > > + struct VFIODeviceFind find; > > + int ret, i; > > + > > + ret = vfio_get_hot_reset_info(vdev, &info); > > + if (ret) { > > + error_report("vfio: Cannot enable AER for device %s," > > + " device does not support hot reset.", > > + vdev->vbasedev.name); > > + goto out; > > + } > > + > > + /* List all affected devices by bus reset */ > > + devices = &info->devices[0]; > > + > > + /* Verify that we have all the groups required */ > > + for (i = 0; i < info->count; i++) { > > + PCIHostDeviceAddress host; > > + VFIOPCIDevice *tmp; > > + VFIODevice *vbasedev_iter; > > + bool found = false; > > + > > + host.domain = devices[i].segment; > > + host.bus = devices[i].bus; > > + host.slot = PCI_SLOT(devices[i].devfn); > > + host.function = PCI_FUNC(devices[i].devfn); > > + > > + /* Skip the current device */ > > + if (vfio_pci_host_match(&host, &vdev->host)) { > > + continue; > > + } > > + > > + /* Ensure we own the group of the affected device */ > > + QLIST_FOREACH(group, &vfio_group_list, next) { > > + if (group->groupid == devices[i].group_id) { > > + break; > > + } > > + } > > + > > + if (!group) { > > + error_report("vfio: Cannot enable AER for device %s, " > > + "depends on group %d which is not > > owned.", > > + vdev->vbasedev.name, > > devices[i].group_id); > > + ret = -1; > > + goto out; > > + } > > + > > + /* Ensure affected devices for reset on/blow the bus */ > > + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { > > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > > + continue; > > + } > > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, > > vbasedev); > > + if (vfio_pci_host_match(&host, &tmp->host)) { > > + PCIDevice *pci = PCI_DEVICE(tmp); > > + > > + /* > > + * For multifunction device, due to vfio driver > > signal all > > + * functions under the upstream link of the end > > point. here > > + * we validate all functions whether enable AER. > > + */ > > + if (vfio_pci_host_slot_match(&vdev->host, &tmp- > > >host) && > > + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { > > + error_report("vfio: Cannot enable AER for > > device %s, on same slot" > > + " the dependent device %s which > > does not enable AER.", > > + vdev->vbasedev.name, tmp- > > >vbasedev.name); > > + ret = -1; > > + goto out; > > + } > > + > > + find.pdev = pci; > > + find.found = false; > > + pci_for_each_device(bus, pci_bus_num(bus), > > + device_find, &find); > > + if (!find.found) { > > + error_report("vfio: Cannot enable AER for > > device %s, " > > + "the dependent device %s is not > > under the same bus", > > + vdev->vbasedev.name, tmp- > > >vbasedev.name); > > + ret = -1; > > + goto out; > > + } > > + found = true; > > + break; > > + } > > + } > > + > > + /* Ensure all affected devices assigned to VM */ > > I am puzzled. > Does not kernel enforce this already? > If not it's a security problem. > If yes why does userspace need to check this? DMA isolation and bus level isolation are separate concepts. Each function of a multi-function device can have DMA isolation, but a user needs to own all of the functions affected by a bus reset in order to perform one. An AER configuration can only be created if the user can translate a guest bus reset into a host bus reset and therefore needs to test whether it has the permissions to do so. I believe over the course of reviews we've also added some simplifying constraints around this to reduce the problem set, things like all the groups being assigned rather than just owned by the user. However, I believe the kernel is sound in how it provides security for bus resets. Thanks, Alex
On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote: > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote: > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote: > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > > when init vfio devices done, we should test all the devices > > > supported > > > aer whether conflict with others. For each one, get the hot reset > > > info for the affected device list. For each affected device, all > > > should attach to the VM and on/below the same bus. also, we should > > > test > > > all of the non-AER supporting vfio-pci devices on or below the > > > target > > > bus to verify they have a reset mechanism. > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > --- > > > hw/vfio/pci.c | 236 > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > > hw/vfio/pci.h | 1 + > > > 2 files changed, 230 insertions(+), 7 deletions(-) > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > > index d00b0e4..6926dcc 100644 > > > --- a/hw/vfio/pci.c > > > +++ b/hw/vfio/pci.c > > > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice > > > *vdev, uint8_t pos) > > > return 0; > > > } > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1, > > > + PCIHostDeviceAddress *host2) > > > +{ > > > + return (host1->domain == host2->domain && host1->bus == host2- > > > >bus && > > > + host1->slot == host2->slot); > > > +} > > > + > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > > > + PCIHostDeviceAddress *host2) > > > +{ > > > + return (vfio_pci_host_slot_match(host1, host2) && > > > + host1->function == host2->function); > > > +} > > > + > > > +struct VFIODeviceFind { > > > + PCIDevice *pdev; > > > + bool found; > > > +}; > > > + > > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice > > > *pdev, > > > + void *opaque) > > > +{ > > > + DeviceState *dev = DEVICE(pdev); > > > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > > > + VFIOPCIDevice *vdev; > > > + struct VFIODeviceFind *find = opaque; > > > + > > > + if (find->found) { > > > + return; > > > + } > > > + > > > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > > > + if (!dc->reset) { > > > + goto found; > > > + } > > > + return; > > > + } > > > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > > > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > > > + !vdev->vbasedev.reset_works) { > > > + goto found; > > > + } > > > + > > > + return; > > > +found: > > > + find->pdev = pdev; > > > + find->found = true; > > > +} > > > + > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void > > > *opaque) > > > +{ > > > + struct VFIODeviceFind *find = opaque; > > > + > > > + if (find->found) { > > > + return; > > > + } > > > + > > > + if (pdev == find->pdev) { > > > + find->found = true; > > > + } > > > +} > > > + > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > > > +{ > > > + PCIBus *bus = vdev->pdev.bus; > > > + struct vfio_pci_hot_reset_info *info = NULL; > > > + struct vfio_pci_dependent_device *devices; > > > + VFIOGroup *group; > > > + struct VFIODeviceFind find; > > > + int ret, i; > > > + > > > + ret = vfio_get_hot_reset_info(vdev, &info); > > > + if (ret) { > > > + error_report("vfio: Cannot enable AER for device %s," > > > + " device does not support hot reset.", > > > + vdev->vbasedev.name); > > > + goto out; > > > + } > > > + > > > + /* List all affected devices by bus reset */ > > > + devices = &info->devices[0]; > > > + > > > + /* Verify that we have all the groups required */ > > > + for (i = 0; i < info->count; i++) { > > > + PCIHostDeviceAddress host; > > > + VFIOPCIDevice *tmp; > > > + VFIODevice *vbasedev_iter; > > > + bool found = false; > > > + > > > + host.domain = devices[i].segment; > > > + host.bus = devices[i].bus; > > > + host.slot = PCI_SLOT(devices[i].devfn); > > > + host.function = PCI_FUNC(devices[i].devfn); > > > + > > > + /* Skip the current device */ > > > + if (vfio_pci_host_match(&host, &vdev->host)) { > > > + continue; > > > + } > > > + > > > + /* Ensure we own the group of the affected device */ > > > + QLIST_FOREACH(group, &vfio_group_list, next) { > > > + if (group->groupid == devices[i].group_id) { > > > + break; > > > + } > > > + } > > > + > > > + if (!group) { > > > + error_report("vfio: Cannot enable AER for device %s, " > > > + "depends on group %d which is not > > > owned.", > > > + vdev->vbasedev.name, > > > devices[i].group_id); > > > + ret = -1; > > > + goto out; > > > + } > > > + > > > + /* Ensure affected devices for reset on/blow the bus */ > > > + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { > > > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > > > + continue; > > > + } > > > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, > > > vbasedev); > > > + if (vfio_pci_host_match(&host, &tmp->host)) { > > > + PCIDevice *pci = PCI_DEVICE(tmp); > > > + > > > + /* > > > + * For multifunction device, due to vfio driver > > > signal all > > > + * functions under the upstream link of the end > > > point. here > > > + * we validate all functions whether enable AER. > > > + */ > > > + if (vfio_pci_host_slot_match(&vdev->host, &tmp- > > > >host) && > > > + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { > > > + error_report("vfio: Cannot enable AER for > > > device %s, on same slot" > > > + " the dependent device %s which > > > does not enable AER.", > > > + vdev->vbasedev.name, tmp- > > > >vbasedev.name); > > > + ret = -1; > > > + goto out; > > > + } > > > + > > > + find.pdev = pci; > > > + find.found = false; > > > + pci_for_each_device(bus, pci_bus_num(bus), > > > + device_find, &find); > > > + if (!find.found) { > > > + error_report("vfio: Cannot enable AER for > > > device %s, " > > > + "the dependent device %s is not > > > under the same bus", > > > + vdev->vbasedev.name, tmp- > > > >vbasedev.name); > > > + ret = -1; > > > + goto out; > > > + } > > > + found = true; > > > + break; > > > + } > > > + } > > > + > > > + /* Ensure all affected devices assigned to VM */ > > > > I am puzzled. > > Does not kernel enforce this already? > > If not it's a security problem. > > If yes why does userspace need to check this? > > DMA isolation and bus level isolation are separate concepts. Each > function of a multi-function device can have DMA isolation, but a user > needs to own all of the functions affected by a bus reset in order to > perform one. An AER configuration can only be created if the user can > translate a guest bus reset into a host bus reset and therefore needs > to test whether it has the permissions to do so. I believe over the > course of reviews we've also added some simplifying constraints around > this to reduce the problem set, things like all the groups being > assigned rather than just owned by the user. However, I believe the > kernel is sound in how it provides security for bus resets. Thanks, > > Alex Yes, sounds good. So how about just trying to do bus reset at setup time? If kernel allows this, we know it is safe ...
On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote: > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote: > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote: > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote: > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > > > > when init vfio devices done, we should test all the devices > > > > supported > > > > aer whether conflict with others. For each one, get the hot > > > > reset > > > > info for the affected device list. For each affected device, > > > > all > > > > should attach to the VM and on/below the same bus. also, we > > > > should > > > > test > > > > all of the non-AER supporting vfio-pci devices on or below the > > > > target > > > > bus to verify they have a reset mechanism. > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > --- > > > > hw/vfio/pci.c | 236 > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > hw/vfio/pci.h | 1 + > > > > 2 files changed, 230 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > > > index d00b0e4..6926dcc 100644 > > > > --- a/hw/vfio/pci.c > > > > +++ b/hw/vfio/pci.c > > > > @@ -1806,6 +1806,216 @@ static int > > > > vfio_add_std_cap(VFIOPCIDevice > > > > *vdev, uint8_t pos) > > > > return 0; > > > > } > > > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress > > > > *host1, > > > > + PCIHostDeviceAddress > > > > *host2) > > > > +{ > > > > + return (host1->domain == host2->domain && host1->bus == > > > > host2- > > > > > bus && > > > > + host1->slot == host2->slot); > > > > +} > > > > + > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > > > > + PCIHostDeviceAddress *host2) > > > > +{ > > > > + return (vfio_pci_host_slot_match(host1, host2) && > > > > + host1->function == host2->function); > > > > +} > > > > + > > > > +struct VFIODeviceFind { > > > > + PCIDevice *pdev; > > > > + bool found; > > > > +}; > > > > + > > > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice > > > > *pdev, > > > > + void *opaque) > > > > +{ > > > > + DeviceState *dev = DEVICE(pdev); > > > > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > > > > + VFIOPCIDevice *vdev; > > > > + struct VFIODeviceFind *find = opaque; > > > > + > > > > + if (find->found) { > > > > + return; > > > > + } > > > > + > > > > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > > > > + if (!dc->reset) { > > > > + goto found; > > > > + } > > > > + return; > > > > + } > > > > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > > > > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > > > > + !vdev->vbasedev.reset_works) { > > > > + goto found; > > > > + } > > > > + > > > > + return; > > > > +found: > > > > + find->pdev = pdev; > > > > + find->found = true; > > > > +} > > > > + > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void > > > > *opaque) > > > > +{ > > > > + struct VFIODeviceFind *find = opaque; > > > > + > > > > + if (find->found) { > > > > + return; > > > > + } > > > > + > > > > + if (pdev == find->pdev) { > > > > + find->found = true; > > > > + } > > > > +} > > > > + > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > > > > +{ > > > > + PCIBus *bus = vdev->pdev.bus; > > > > + struct vfio_pci_hot_reset_info *info = NULL; > > > > + struct vfio_pci_dependent_device *devices; > > > > + VFIOGroup *group; > > > > + struct VFIODeviceFind find; > > > > + int ret, i; > > > > + > > > > + ret = vfio_get_hot_reset_info(vdev, &info); > > > > + if (ret) { > > > > + error_report("vfio: Cannot enable AER for device %s," > > > > + " device does not support hot reset.", > > > > + vdev->vbasedev.name); > > > > + goto out; > > > > + } > > > > + > > > > + /* List all affected devices by bus reset */ > > > > + devices = &info->devices[0]; > > > > + > > > > + /* Verify that we have all the groups required */ > > > > + for (i = 0; i < info->count; i++) { > > > > + PCIHostDeviceAddress host; > > > > + VFIOPCIDevice *tmp; > > > > + VFIODevice *vbasedev_iter; > > > > + bool found = false; > > > > + > > > > + host.domain = devices[i].segment; > > > > + host.bus = devices[i].bus; > > > > + host.slot = PCI_SLOT(devices[i].devfn); > > > > + host.function = PCI_FUNC(devices[i].devfn); > > > > + > > > > + /* Skip the current device */ > > > > + if (vfio_pci_host_match(&host, &vdev->host)) { > > > > + continue; > > > > + } > > > > + > > > > + /* Ensure we own the group of the affected device */ > > > > + QLIST_FOREACH(group, &vfio_group_list, next) { > > > > + if (group->groupid == devices[i].group_id) { > > > > + break; > > > > + } > > > > + } > > > > + > > > > + if (!group) { > > > > + error_report("vfio: Cannot enable AER for device > > > > %s, " > > > > + "depends on group %d which is not > > > > owned.", > > > > + vdev->vbasedev.name, > > > > devices[i].group_id); > > > > + ret = -1; > > > > + goto out; > > > > + } > > > > + > > > > + /* Ensure affected devices for reset on/blow the bus > > > > */ > > > > + QLIST_FOREACH(vbasedev_iter, &group->device_list, > > > > next) { > > > > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > > > > + continue; > > > > + } > > > > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, > > > > vbasedev); > > > > + if (vfio_pci_host_match(&host, &tmp->host)) { > > > > + PCIDevice *pci = PCI_DEVICE(tmp); > > > > + > > > > + /* > > > > + * For multifunction device, due to vfio > > > > driver > > > > signal all > > > > + * functions under the upstream link of the > > > > end > > > > point. here > > > > + * we validate all functions whether enable > > > > AER. > > > > + */ > > > > + if (vfio_pci_host_slot_match(&vdev->host, > > > > &tmp- > > > > > host) && > > > > + !(tmp->features & > > > > VFIO_FEATURE_ENABLE_AER)) { > > > > + error_report("vfio: Cannot enable AER for > > > > device %s, on same slot" > > > > + " the dependent device %s > > > > which > > > > does not enable AER.", > > > > + vdev->vbasedev.name, tmp- > > > > > vbasedev.name); > > > > + ret = -1; > > > > + goto out; > > > > + } > > > > + > > > > + find.pdev = pci; > > > > + find.found = false; > > > > + pci_for_each_device(bus, pci_bus_num(bus), > > > > + device_find, &find); > > > > + if (!find.found) { > > > > + error_report("vfio: Cannot enable AER for > > > > device %s, " > > > > + "the dependent device %s is > > > > not > > > > under the same bus", > > > > + vdev->vbasedev.name, tmp- > > > > > vbasedev.name); > > > > + ret = -1; > > > > + goto out; > > > > + } > > > > + found = true; > > > > + break; > > > > + } > > > > + } > > > > + > > > > + /* Ensure all affected devices assigned to VM */ > > > > > > I am puzzled. > > > Does not kernel enforce this already? > > > If not it's a security problem. > > > If yes why does userspace need to check this? > > > > DMA isolation and bus level isolation are separate concepts. Each > > function of a multi-function device can have DMA isolation, but a > > user > > needs to own all of the functions affected by a bus reset in order > > to > > perform one. An AER configuration can only be created if the user > > can > > translate a guest bus reset into a host bus reset and therefore > > needs > > to test whether it has the permissions to do so. I believe over > > the > > course of reviews we've also added some simplifying constraints > > around > > this to reduce the problem set, things like all the groups being > > assigned rather than just owned by the user. However, I believe > > the > > kernel is sound in how it provides security for bus resets. > > Thanks, > > > > Alex > > Yes, sounds good. > > So how about just trying to do bus reset at setup time? > If kernel allows this, we know it is safe ... The host may support hotplug, what's possible at setup time may not be possible when an error occurs. It's unlikely, but worth considering I think. Thanks, Alex
On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote: > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote: > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote: > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote: > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote: > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > > > > > > when init vfio devices done, we should test all the devices > > > > > supported > > > > > aer whether conflict with others. For each one, get the hot > > > > > reset > > > > > info for the affected device list. For each affected device, > > > > > all > > > > > should attach to the VM and on/below the same bus. also, we > > > > > should > > > > > test > > > > > all of the non-AER supporting vfio-pci devices on or below the > > > > > target > > > > > bus to verify they have a reset mechanism. > > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > --- > > > > > hw/vfio/pci.c | 236 > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > > hw/vfio/pci.h | 1 + > > > > > 2 files changed, 230 insertions(+), 7 deletions(-) > > > > > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > > > > index d00b0e4..6926dcc 100644 > > > > > --- a/hw/vfio/pci.c > > > > > +++ b/hw/vfio/pci.c > > > > > @@ -1806,6 +1806,216 @@ static int > > > > > vfio_add_std_cap(VFIOPCIDevice > > > > > *vdev, uint8_t pos) > > > > > return 0; > > > > > } > > > > > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress > > > > > *host1, > > > > > + PCIHostDeviceAddress > > > > > *host2) > > > > > +{ > > > > > + return (host1->domain == host2->domain && host1->bus == > > > > > host2- > > > > > > bus && > > > > > + host1->slot == host2->slot); > > > > > +} > > > > > + > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > > > > > + PCIHostDeviceAddress *host2) > > > > > +{ > > > > > + return (vfio_pci_host_slot_match(host1, host2) && > > > > > + host1->function == host2->function); > > > > > +} > > > > > + > > > > > +struct VFIODeviceFind { > > > > > + PCIDevice *pdev; > > > > > + bool found; > > > > > +}; > > > > > + > > > > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice > > > > > *pdev, > > > > > + void *opaque) > > > > > +{ > > > > > + DeviceState *dev = DEVICE(pdev); > > > > > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > > > > > + VFIOPCIDevice *vdev; > > > > > + struct VFIODeviceFind *find = opaque; > > > > > + > > > > > + if (find->found) { > > > > > + return; > > > > > + } > > > > > + > > > > > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > > > > > + if (!dc->reset) { > > > > > + goto found; > > > > > + } > > > > > + return; > > > > > + } > > > > > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > > > > > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > > > > > + !vdev->vbasedev.reset_works) { > > > > > + goto found; > > > > > + } > > > > > + > > > > > + return; > > > > > +found: > > > > > + find->pdev = pdev; > > > > > + find->found = true; > > > > > +} > > > > > + > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void > > > > > *opaque) > > > > > +{ > > > > > + struct VFIODeviceFind *find = opaque; > > > > > + > > > > > + if (find->found) { > > > > > + return; > > > > > + } > > > > > + > > > > > + if (pdev == find->pdev) { > > > > > + find->found = true; > > > > > + } > > > > > +} > > > > > + > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > > > > > +{ > > > > > + PCIBus *bus = vdev->pdev.bus; > > > > > + struct vfio_pci_hot_reset_info *info = NULL; > > > > > + struct vfio_pci_dependent_device *devices; > > > > > + VFIOGroup *group; > > > > > + struct VFIODeviceFind find; > > > > > + int ret, i; > > > > > + > > > > > + ret = vfio_get_hot_reset_info(vdev, &info); > > > > > + if (ret) { > > > > > + error_report("vfio: Cannot enable AER for device %s," > > > > > + " device does not support hot reset.", > > > > > + vdev->vbasedev.name); > > > > > + goto out; > > > > > + } > > > > > + > > > > > + /* List all affected devices by bus reset */ > > > > > + devices = &info->devices[0]; > > > > > + > > > > > + /* Verify that we have all the groups required */ > > > > > + for (i = 0; i < info->count; i++) { > > > > > + PCIHostDeviceAddress host; > > > > > + VFIOPCIDevice *tmp; > > > > > + VFIODevice *vbasedev_iter; > > > > > + bool found = false; > > > > > + > > > > > + host.domain = devices[i].segment; > > > > > + host.bus = devices[i].bus; > > > > > + host.slot = PCI_SLOT(devices[i].devfn); > > > > > + host.function = PCI_FUNC(devices[i].devfn); > > > > > + > > > > > + /* Skip the current device */ > > > > > + if (vfio_pci_host_match(&host, &vdev->host)) { > > > > > + continue; > > > > > + } > > > > > + > > > > > + /* Ensure we own the group of the affected device */ > > > > > + QLIST_FOREACH(group, &vfio_group_list, next) { > > > > > + if (group->groupid == devices[i].group_id) { > > > > > + break; > > > > > + } > > > > > + } > > > > > + > > > > > + if (!group) { > > > > > + error_report("vfio: Cannot enable AER for device > > > > > %s, " > > > > > + "depends on group %d which is not > > > > > owned.", > > > > > + vdev->vbasedev.name, > > > > > devices[i].group_id); > > > > > + ret = -1; > > > > > + goto out; > > > > > + } > > > > > + > > > > > + /* Ensure affected devices for reset on/blow the bus > > > > > */ > > > > > + QLIST_FOREACH(vbasedev_iter, &group->device_list, > > > > > next) { > > > > > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > > > > > + continue; > > > > > + } > > > > > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, > > > > > vbasedev); > > > > > + if (vfio_pci_host_match(&host, &tmp->host)) { > > > > > + PCIDevice *pci = PCI_DEVICE(tmp); > > > > > + > > > > > + /* > > > > > + * For multifunction device, due to vfio > > > > > driver > > > > > signal all > > > > > + * functions under the upstream link of the > > > > > end > > > > > point. here > > > > > + * we validate all functions whether enable > > > > > AER. > > > > > + */ > > > > > + if (vfio_pci_host_slot_match(&vdev->host, > > > > > &tmp- > > > > > > host) && > > > > > + !(tmp->features & > > > > > VFIO_FEATURE_ENABLE_AER)) { > > > > > + error_report("vfio: Cannot enable AER for > > > > > device %s, on same slot" > > > > > + " the dependent device %s > > > > > which > > > > > does not enable AER.", > > > > > + vdev->vbasedev.name, tmp- > > > > > > vbasedev.name); > > > > > + ret = -1; > > > > > + goto out; > > > > > + } > > > > > + > > > > > + find.pdev = pci; > > > > > + find.found = false; > > > > > + pci_for_each_device(bus, pci_bus_num(bus), > > > > > + device_find, &find); > > > > > + if (!find.found) { > > > > > + error_report("vfio: Cannot enable AER for > > > > > device %s, " > > > > > + "the dependent device %s is > > > > > not > > > > > under the same bus", > > > > > + vdev->vbasedev.name, tmp- > > > > > > vbasedev.name); > > > > > + ret = -1; > > > > > + goto out; > > > > > + } > > > > > + found = true; > > > > > + break; > > > > > + } > > > > > + } > > > > > + > > > > > + /* Ensure all affected devices assigned to VM */ > > > > > > > > I am puzzled. > > > > Does not kernel enforce this already? > > > > If not it's a security problem. > > > > If yes why does userspace need to check this? > > > > > > DMA isolation and bus level isolation are separate concepts. Each > > > function of a multi-function device can have DMA isolation, but a > > > user > > > needs to own all of the functions affected by a bus reset in order > > > to > > > perform one. An AER configuration can only be created if the user > > > can > > > translate a guest bus reset into a host bus reset and therefore > > > needs > > > to test whether it has the permissions to do so. I believe over > > > the > > > course of reviews we've also added some simplifying constraints > > > around > > > this to reduce the problem set, things like all the groups being > > > assigned rather than just owned by the user. However, I believe > > > the > > > kernel is sound in how it provides security for bus resets. > > > Thanks, > > > > > > Alex > > > > Yes, sounds good. > > > > So how about just trying to do bus reset at setup time? > > If kernel allows this, we know it is safe ... > > The host may support hotplug, what's possible at setup time may not be > possible when an error occurs. How does this patch help solve this problem? > It's unlikely, but worth considering I > think. I suspect vfio will have to solve this in kernel (e.g. automatically add all new devices in the same group wrt reset). > Thanks, > > Alex
On Thu, 2015-12-24 at 20:23 +0200, Michael S. Tsirkin wrote: > On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote: > > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote: > > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote: > > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote: > > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote: > > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > > > > > > > > when init vfio devices done, we should test all the devices > > > > > > supported > > > > > > aer whether conflict with others. For each one, get the hot > > > > > > reset > > > > > > info for the affected device list. For each affected > > > > > > device, > > > > > > all > > > > > > should attach to the VM and on/below the same bus. also, we > > > > > > should > > > > > > test > > > > > > all of the non-AER supporting vfio-pci devices on or below > > > > > > the > > > > > > target > > > > > > bus to verify they have a reset mechanism. > > > > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > > --- > > > > > > hw/vfio/pci.c | 236 > > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > > > hw/vfio/pci.h | 1 + > > > > > > 2 files changed, 230 insertions(+), 7 deletions(-) > > > > > > > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > > > > > index d00b0e4..6926dcc 100644 > > > > > > --- a/hw/vfio/pci.c > > > > > > +++ b/hw/vfio/pci.c > > > > > > @@ -1806,6 +1806,216 @@ static int > > > > > > vfio_add_std_cap(VFIOPCIDevice > > > > > > *vdev, uint8_t pos) > > > > > > return 0; > > > > > > } > > > > > > > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress > > > > > > *host1, > > > > > > + PCIHostDeviceAddress > > > > > > *host2) > > > > > > +{ > > > > > > + return (host1->domain == host2->domain && host1->bus > > > > > > == > > > > > > host2- > > > > > > > bus && > > > > > > + host1->slot == host2->slot); > > > > > > +} > > > > > > + > > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress > > > > > > *host1, > > > > > > + PCIHostDeviceAddress > > > > > > *host2) > > > > > > +{ > > > > > > + return (vfio_pci_host_slot_match(host1, host2) && > > > > > > + host1->function == host2->function); > > > > > > +} > > > > > > + > > > > > > +struct VFIODeviceFind { > > > > > > + PCIDevice *pdev; > > > > > > + bool found; > > > > > > +}; > > > > > > + > > > > > > +static void vfio_check_device_noreset(PCIBus *bus, > > > > > > PCIDevice > > > > > > *pdev, > > > > > > + void *opaque) > > > > > > +{ > > > > > > + DeviceState *dev = DEVICE(pdev); > > > > > > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > > > > > > + VFIOPCIDevice *vdev; > > > > > > + struct VFIODeviceFind *find = opaque; > > > > > > + > > > > > > + if (find->found) { > > > > > > + return; > > > > > > + } > > > > > > + > > > > > > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > > > > > > + if (!dc->reset) { > > > > > > + goto found; > > > > > > + } > > > > > > + return; > > > > > > + } > > > > > > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > > > > > > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > > > > > > + !vdev->vbasedev.reset_works) { > > > > > > + goto found; > > > > > > + } > > > > > > + > > > > > > + return; > > > > > > +found: > > > > > > + find->pdev = pdev; > > > > > > + find->found = true; > > > > > > +} > > > > > > + > > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void > > > > > > *opaque) > > > > > > +{ > > > > > > + struct VFIODeviceFind *find = opaque; > > > > > > + > > > > > > + if (find->found) { > > > > > > + return; > > > > > > + } > > > > > > + > > > > > > + if (pdev == find->pdev) { > > > > > > + find->found = true; > > > > > > + } > > > > > > +} > > > > > > + > > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > > > > > > +{ > > > > > > + PCIBus *bus = vdev->pdev.bus; > > > > > > + struct vfio_pci_hot_reset_info *info = NULL; > > > > > > + struct vfio_pci_dependent_device *devices; > > > > > > + VFIOGroup *group; > > > > > > + struct VFIODeviceFind find; > > > > > > + int ret, i; > > > > > > + > > > > > > + ret = vfio_get_hot_reset_info(vdev, &info); > > > > > > + if (ret) { > > > > > > + error_report("vfio: Cannot enable AER for device > > > > > > %s," > > > > > > + " device does not support hot > > > > > > reset.", > > > > > > + vdev->vbasedev.name); > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > + /* List all affected devices by bus reset */ > > > > > > + devices = &info->devices[0]; > > > > > > + > > > > > > + /* Verify that we have all the groups required */ > > > > > > + for (i = 0; i < info->count; i++) { > > > > > > + PCIHostDeviceAddress host; > > > > > > + VFIOPCIDevice *tmp; > > > > > > + VFIODevice *vbasedev_iter; > > > > > > + bool found = false; > > > > > > + > > > > > > + host.domain = devices[i].segment; > > > > > > + host.bus = devices[i].bus; > > > > > > + host.slot = PCI_SLOT(devices[i].devfn); > > > > > > + host.function = PCI_FUNC(devices[i].devfn); > > > > > > + > > > > > > + /* Skip the current device */ > > > > > > + if (vfio_pci_host_match(&host, &vdev->host)) { > > > > > > + continue; > > > > > > + } > > > > > > + > > > > > > + /* Ensure we own the group of the affected device > > > > > > */ > > > > > > + QLIST_FOREACH(group, &vfio_group_list, next) { > > > > > > + if (group->groupid == devices[i].group_id) { > > > > > > + break; > > > > > > + } > > > > > > + } > > > > > > + > > > > > > + if (!group) { > > > > > > + error_report("vfio: Cannot enable AER for > > > > > > device > > > > > > %s, " > > > > > > + "depends on group %d which is not > > > > > > owned.", > > > > > > + vdev->vbasedev.name, > > > > > > devices[i].group_id); > > > > > > + ret = -1; > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > + /* Ensure affected devices for reset on/blow the > > > > > > bus > > > > > > */ > > > > > > + QLIST_FOREACH(vbasedev_iter, &group->device_list, > > > > > > next) { > > > > > > + if (vbasedev_iter->type != > > > > > > VFIO_DEVICE_TYPE_PCI) { > > > > > > + continue; > > > > > > + } > > > > > > + tmp = container_of(vbasedev_iter, > > > > > > VFIOPCIDevice, > > > > > > vbasedev); > > > > > > + if (vfio_pci_host_match(&host, &tmp->host)) { > > > > > > + PCIDevice *pci = PCI_DEVICE(tmp); > > > > > > + > > > > > > + /* > > > > > > + * For multifunction device, due to vfio > > > > > > driver > > > > > > signal all > > > > > > + * functions under the upstream link of > > > > > > the > > > > > > end > > > > > > point. here > > > > > > + * we validate all functions whether > > > > > > enable > > > > > > AER. > > > > > > + */ > > > > > > + if (vfio_pci_host_slot_match(&vdev->host, > > > > > > &tmp- > > > > > > > host) && > > > > > > + !(tmp->features & > > > > > > VFIO_FEATURE_ENABLE_AER)) { > > > > > > + error_report("vfio: Cannot enable AER > > > > > > for > > > > > > device %s, on same slot" > > > > > > + " the dependent device %s > > > > > > which > > > > > > does not enable AER.", > > > > > > + vdev->vbasedev.name, tmp- > > > > > > > vbasedev.name); > > > > > > + ret = -1; > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > + find.pdev = pci; > > > > > > + find.found = false; > > > > > > + pci_for_each_device(bus, pci_bus_num(bus), > > > > > > + device_find, &find); > > > > > > + if (!find.found) { > > > > > > + error_report("vfio: Cannot enable AER > > > > > > for > > > > > > device %s, " > > > > > > + "the dependent device %s > > > > > > is > > > > > > not > > > > > > under the same bus", > > > > > > + vdev->vbasedev.name, tmp- > > > > > > > vbasedev.name); > > > > > > + ret = -1; > > > > > > + goto out; > > > > > > + } > > > > > > + found = true; > > > > > > + break; > > > > > > + } > > > > > > + } > > > > > > + > > > > > > + /* Ensure all affected devices assigned to VM */ > > > > > > > > > > I am puzzled. > > > > > Does not kernel enforce this already? > > > > > If not it's a security problem. > > > > > If yes why does userspace need to check this? > > > > > > > > DMA isolation and bus level isolation are separate concepts. > > > > Each > > > > function of a multi-function device can have DMA isolation, but > > > > a > > > > user > > > > needs to own all of the functions affected by a bus reset in > > > > order > > > > to > > > > perform one. An AER configuration can only be created if the > > > > user > > > > can > > > > translate a guest bus reset into a host bus reset and therefore > > > > needs > > > > to test whether it has the permissions to do so. I believe > > > > over > > > > the > > > > course of reviews we've also added some simplifying constraints > > > > around > > > > this to reduce the problem set, things like all the groups > > > > being > > > > assigned rather than just owned by the user. However, I > > > > believe > > > > the > > > > kernel is sound in how it provides security for bus resets. > > > > Thanks, > > > > > > > > Alex > > > > > > Yes, sounds good. > > > > > > So how about just trying to do bus reset at setup time? > > > If kernel allows this, we know it is safe ... > > > > The host may support hotplug, what's possible at setup time may not > > be > > possible when an error occurs. > > How does this patch help solve this problem? I believe there's a patch in this series that re-tests on the occurrence of an error, before injecting the AER into the guest. > > It's unlikely, but worth considering I > > think. > > I suspect vfio will have to solve this in kernel > (e.g. automatically add all new devices in the same group > wrt reset). Nope, the user simply loses their ability to reset the bus if they don't own all the groups at the time they attempt to do a bus reset. Mixing bus isolation and DMA isolation would cause a mess of groups.
On Thu, Dec 24, 2015 at 11:41:15AM -0700, Alex Williamson wrote: > On Thu, 2015-12-24 at 20:23 +0200, Michael S. Tsirkin wrote: > > On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote: > > > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote: > > > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote: > > > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote: > > > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote: > > > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > > > > > > > > > > when init vfio devices done, we should test all the devices > > > > > > > supported > > > > > > > aer whether conflict with others. For each one, get the hot > > > > > > > reset > > > > > > > info for the affected device list. For each affected > > > > > > > device, > > > > > > > all > > > > > > > should attach to the VM and on/below the same bus. also, we > > > > > > > should > > > > > > > test > > > > > > > all of the non-AER supporting vfio-pci devices on or below > > > > > > > the > > > > > > > target > > > > > > > bus to verify they have a reset mechanism. > > > > > > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com> > > > > > > > --- > > > > > > > hw/vfio/pci.c | 236 > > > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > > > > hw/vfio/pci.h | 1 + > > > > > > > 2 files changed, 230 insertions(+), 7 deletions(-) > > > > > > > > > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > > > > > > index d00b0e4..6926dcc 100644 > > > > > > > --- a/hw/vfio/pci.c > > > > > > > +++ b/hw/vfio/pci.c > > > > > > > @@ -1806,6 +1806,216 @@ static int > > > > > > > vfio_add_std_cap(VFIOPCIDevice > > > > > > > *vdev, uint8_t pos) > > > > > > > return 0; > > > > > > > } > > > > > > > > > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress > > > > > > > *host1, > > > > > > > + PCIHostDeviceAddress > > > > > > > *host2) > > > > > > > +{ > > > > > > > + return (host1->domain == host2->domain && host1->bus > > > > > > > == > > > > > > > host2- > > > > > > > > bus && > > > > > > > + host1->slot == host2->slot); > > > > > > > +} > > > > > > > + > > > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress > > > > > > > *host1, > > > > > > > + PCIHostDeviceAddress > > > > > > > *host2) > > > > > > > +{ > > > > > > > + return (vfio_pci_host_slot_match(host1, host2) && > > > > > > > + host1->function == host2->function); > > > > > > > +} > > > > > > > + > > > > > > > +struct VFIODeviceFind { > > > > > > > + PCIDevice *pdev; > > > > > > > + bool found; > > > > > > > +}; > > > > > > > + > > > > > > > +static void vfio_check_device_noreset(PCIBus *bus, > > > > > > > PCIDevice > > > > > > > *pdev, > > > > > > > + void *opaque) > > > > > > > +{ > > > > > > > + DeviceState *dev = DEVICE(pdev); > > > > > > > + DeviceClass *dc = DEVICE_GET_CLASS(dev); > > > > > > > + VFIOPCIDevice *vdev; > > > > > > > + struct VFIODeviceFind *find = opaque; > > > > > > > + > > > > > > > + if (find->found) { > > > > > > > + return; > > > > > > > + } > > > > > > > + > > > > > > > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > > > > > > > + if (!dc->reset) { > > > > > > > + goto found; > > > > > > > + } > > > > > > > + return; > > > > > > > + } > > > > > > > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > > > > > > > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > > > > > > > + !vdev->vbasedev.reset_works) { > > > > > > > + goto found; > > > > > > > + } > > > > > > > + > > > > > > > + return; > > > > > > > +found: > > > > > > > + find->pdev = pdev; > > > > > > > + find->found = true; > > > > > > > +} > > > > > > > + > > > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void > > > > > > > *opaque) > > > > > > > +{ > > > > > > > + struct VFIODeviceFind *find = opaque; > > > > > > > + > > > > > > > + if (find->found) { > > > > > > > + return; > > > > > > > + } > > > > > > > + > > > > > > > + if (pdev == find->pdev) { > > > > > > > + find->found = true; > > > > > > > + } > > > > > > > +} > > > > > > > + > > > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > > > > > > > +{ > > > > > > > + PCIBus *bus = vdev->pdev.bus; > > > > > > > + struct vfio_pci_hot_reset_info *info = NULL; > > > > > > > + struct vfio_pci_dependent_device *devices; > > > > > > > + VFIOGroup *group; > > > > > > > + struct VFIODeviceFind find; > > > > > > > + int ret, i; > > > > > > > + > > > > > > > + ret = vfio_get_hot_reset_info(vdev, &info); > > > > > > > + if (ret) { > > > > > > > + error_report("vfio: Cannot enable AER for device > > > > > > > %s," > > > > > > > + " device does not support hot > > > > > > > reset.", > > > > > > > + vdev->vbasedev.name); > > > > > > > + goto out; > > > > > > > + } > > > > > > > + > > > > > > > + /* List all affected devices by bus reset */ > > > > > > > + devices = &info->devices[0]; > > > > > > > + > > > > > > > + /* Verify that we have all the groups required */ > > > > > > > + for (i = 0; i < info->count; i++) { > > > > > > > + PCIHostDeviceAddress host; > > > > > > > + VFIOPCIDevice *tmp; > > > > > > > + VFIODevice *vbasedev_iter; > > > > > > > + bool found = false; > > > > > > > + > > > > > > > + host.domain = devices[i].segment; > > > > > > > + host.bus = devices[i].bus; > > > > > > > + host.slot = PCI_SLOT(devices[i].devfn); > > > > > > > + host.function = PCI_FUNC(devices[i].devfn); > > > > > > > + > > > > > > > + /* Skip the current device */ > > > > > > > + if (vfio_pci_host_match(&host, &vdev->host)) { > > > > > > > + continue; > > > > > > > + } > > > > > > > + > > > > > > > + /* Ensure we own the group of the affected device > > > > > > > */ > > > > > > > + QLIST_FOREACH(group, &vfio_group_list, next) { > > > > > > > + if (group->groupid == devices[i].group_id) { > > > > > > > + break; > > > > > > > + } > > > > > > > + } > > > > > > > + > > > > > > > + if (!group) { > > > > > > > + error_report("vfio: Cannot enable AER for > > > > > > > device > > > > > > > %s, " > > > > > > > + "depends on group %d which is not > > > > > > > owned.", > > > > > > > + vdev->vbasedev.name, > > > > > > > devices[i].group_id); > > > > > > > + ret = -1; > > > > > > > + goto out; > > > > > > > + } > > > > > > > + > > > > > > > + /* Ensure affected devices for reset on/blow the > > > > > > > bus > > > > > > > */ > > > > > > > + QLIST_FOREACH(vbasedev_iter, &group->device_list, > > > > > > > next) { > > > > > > > + if (vbasedev_iter->type != > > > > > > > VFIO_DEVICE_TYPE_PCI) { > > > > > > > + continue; > > > > > > > + } > > > > > > > + tmp = container_of(vbasedev_iter, > > > > > > > VFIOPCIDevice, > > > > > > > vbasedev); > > > > > > > + if (vfio_pci_host_match(&host, &tmp->host)) { > > > > > > > + PCIDevice *pci = PCI_DEVICE(tmp); > > > > > > > + > > > > > > > + /* > > > > > > > + * For multifunction device, due to vfio > > > > > > > driver > > > > > > > signal all > > > > > > > + * functions under the upstream link of > > > > > > > the > > > > > > > end > > > > > > > point. here > > > > > > > + * we validate all functions whether > > > > > > > enable > > > > > > > AER. > > > > > > > + */ > > > > > > > + if (vfio_pci_host_slot_match(&vdev->host, > > > > > > > &tmp- > > > > > > > > host) && > > > > > > > + !(tmp->features & > > > > > > > VFIO_FEATURE_ENABLE_AER)) { > > > > > > > + error_report("vfio: Cannot enable AER > > > > > > > for > > > > > > > device %s, on same slot" > > > > > > > + " the dependent device %s > > > > > > > which > > > > > > > does not enable AER.", > > > > > > > + vdev->vbasedev.name, tmp- > > > > > > > > vbasedev.name); > > > > > > > + ret = -1; > > > > > > > + goto out; > > > > > > > + } > > > > > > > + > > > > > > > + find.pdev = pci; > > > > > > > + find.found = false; > > > > > > > + pci_for_each_device(bus, pci_bus_num(bus), > > > > > > > + device_find, &find); > > > > > > > + if (!find.found) { > > > > > > > + error_report("vfio: Cannot enable AER > > > > > > > for > > > > > > > device %s, " > > > > > > > + "the dependent device %s > > > > > > > is > > > > > > > not > > > > > > > under the same bus", > > > > > > > + vdev->vbasedev.name, tmp- > > > > > > > > vbasedev.name); > > > > > > > + ret = -1; > > > > > > > + goto out; > > > > > > > + } > > > > > > > + found = true; > > > > > > > + break; > > > > > > > + } > > > > > > > + } > > > > > > > + > > > > > > > + /* Ensure all affected devices assigned to VM */ > > > > > > > > > > > > I am puzzled. > > > > > > Does not kernel enforce this already? > > > > > > If not it's a security problem. > > > > > > If yes why does userspace need to check this? > > > > > > > > > > DMA isolation and bus level isolation are separate concepts. > > > > > Each > > > > > function of a multi-function device can have DMA isolation, but > > > > > a > > > > > user > > > > > needs to own all of the functions affected by a bus reset in > > > > > order > > > > > to > > > > > perform one. An AER configuration can only be created if the > > > > > user > > > > > can > > > > > translate a guest bus reset into a host bus reset and therefore > > > > > needs > > > > > to test whether it has the permissions to do so. I believe > > > > > over > > > > > the > > > > > course of reviews we've also added some simplifying constraints > > > > > around > > > > > this to reduce the problem set, things like all the groups > > > > > being > > > > > assigned rather than just owned by the user. However, I > > > > > believe > > > > > the > > > > > kernel is sound in how it provides security for bus resets. > > > > > Thanks, > > > > > > > > > > Alex > > > > > > > > Yes, sounds good. > > > > > > > > So how about just trying to do bus reset at setup time? > > > > If kernel allows this, we know it is safe ... > > > > > > The host may support hotplug, what's possible at setup time may not > > > be > > > possible when an error occurs. > > > > How does this patch help solve this problem? > > I believe there's a patch in this series that re-tests on the > occurrence of an error, before injecting the AER into the guest. Doesn't seem robust. What if hotplug happens right after error is injected? > > > It's unlikely, but worth considering I > > > think. > > > > I suspect vfio will have to solve this in kernel > > (e.g. automatically add all new devices in the same group > > wrt reset). > > Nope, the user simply loses their ability to reset the bus if they > don't own all the groups at the time they attempt to do a bus reset. Hmm, this is sub-optimal. Assume I hot-plug a device behind a bus. I fully intend to pass it through to a VM where all other devices are but before I manage to do this, an error triggers. > Mixing bus isolation and DMA isolation would cause a mess of groups. Not sure how what I said implies this. I merely suggested that if vfio takes over bus reset it should take over handling hotplug as well, so devices added on this bus are automatically pevented from being used by anyone except the same VM, making it safe to reset them.
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index d00b0e4..6926dcc 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) return 0; } +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1, + PCIHostDeviceAddress *host2) +{ + return (host1->domain == host2->domain && host1->bus == host2->bus && + host1->slot == host2->slot); +} + +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, + PCIHostDeviceAddress *host2) +{ + return (vfio_pci_host_slot_match(host1, host2) && + host1->function == host2->function); +} + +struct VFIODeviceFind { + PCIDevice *pdev; + bool found; +}; + +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + DeviceState *dev = DEVICE(pdev); + DeviceClass *dc = DEVICE_GET_CLASS(dev); + VFIOPCIDevice *vdev; + struct VFIODeviceFind *find = opaque; + + if (find->found) { + return; + } + + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { + if (!dc->reset) { + goto found; + } + return; + } + vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && + !vdev->vbasedev.reset_works) { + goto found; + } + + return; +found: + find->pdev = pdev; + find->found = true; +} + +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque) +{ + struct VFIODeviceFind *find = opaque; + + if (find->found) { + return; + } + + if (pdev == find->pdev) { + find->found = true; + } +} + +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) +{ + PCIBus *bus = vdev->pdev.bus; + struct vfio_pci_hot_reset_info *info = NULL; + struct vfio_pci_dependent_device *devices; + VFIOGroup *group; + struct VFIODeviceFind find; + int ret, i; + + ret = vfio_get_hot_reset_info(vdev, &info); + if (ret) { + error_report("vfio: Cannot enable AER for device %s," + " device does not support hot reset.", + vdev->vbasedev.name); + goto out; + } + + /* List all affected devices by bus reset */ + devices = &info->devices[0]; + + /* Verify that we have all the groups required */ + for (i = 0; i < info->count; i++) { + PCIHostDeviceAddress host; + VFIOPCIDevice *tmp; + VFIODevice *vbasedev_iter; + bool found = false; + + host.domain = devices[i].segment; + host.bus = devices[i].bus; + host.slot = PCI_SLOT(devices[i].devfn); + host.function = PCI_FUNC(devices[i].devfn); + + /* Skip the current device */ + if (vfio_pci_host_match(&host, &vdev->host)) { + continue; + } + + /* Ensure we own the group of the affected device */ + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == devices[i].group_id) { + break; + } + } + + if (!group) { + error_report("vfio: Cannot enable AER for device %s, " + "depends on group %d which is not owned.", + vdev->vbasedev.name, devices[i].group_id); + ret = -1; + goto out; + } + + /* Ensure affected devices for reset on/blow the bus */ + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); + if (vfio_pci_host_match(&host, &tmp->host)) { + PCIDevice *pci = PCI_DEVICE(tmp); + + /* + * For multifunction device, due to vfio driver signal all + * functions under the upstream link of the end point. here + * we validate all functions whether enable AER. + */ + if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) && + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { + error_report("vfio: Cannot enable AER for device %s, on same slot" + " the dependent device %s which does not enable AER.", + vdev->vbasedev.name, tmp->vbasedev.name); + ret = -1; + goto out; + } + + find.pdev = pci; + find.found = false; + pci_for_each_device(bus, pci_bus_num(bus), + device_find, &find); + if (!find.found) { + error_report("vfio: Cannot enable AER for device %s, " + "the dependent device %s is not under the same bus", + vdev->vbasedev.name, tmp->vbasedev.name); + ret = -1; + goto out; + } + found = true; + break; + } + } + + /* Ensure all affected devices assigned to VM */ + if (!found) { + error_report("vfio: Cannot enable AER for device %s, " + "the dependent device %04x:%02x:%02x.%x " + "is not assigned to VM.", + vdev->vbasedev.name, host.domain, host.bus, + host.slot, host.function); + ret = -1; + goto out; + } + } + + /* + * Check the all pci devices on or below the target bus + * have a reset mechanism at least. + */ + find.pdev = NULL; + find.found = false; + pci_for_each_device(bus, pci_bus_num(bus), + vfio_check_device_noreset, &find); + if (find.found) { + error_report("vfio: Cannot enable AER for device %s, " + "the affected device %s does not have a reset mechanism.", + vdev->vbasedev.name, find.pdev->name); + ret = -1; + goto out; + } + + ret = 0; +out: + g_free(info); + return ret; +} + +static int vfio_check_devices_host_bus_reset(void) +{ + VFIOGroup *group; + VFIODevice *vbasedev; + VFIOPCIDevice *vdev; + + /* Check All vfio-pci devices if have bus reset capability */ + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + if ((vdev->features & VFIO_FEATURE_ENABLE_AER) && + vfio_check_host_bus_reset(vdev)) { + return -1; + } + } + } + + return 0; +} + static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, int pos, uint16_t size) { @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) vfio_intx_enable(vdev); } -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, - PCIHostDeviceAddress *host2) -{ - return (host1->domain == host2->domain && host1->bus == host2->bus && - host1->slot == host2->slot && host1->function == host2->function); -} - static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) { VFIOGroup *group; @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused) +{ + int ret; + + ret = vfio_check_devices_host_bus_reset(); + if (ret) { + exit(1); + } +} + +static Notifier machine_notifier = { + .notify = vfio_pci_machine_done_notify, +}; + static int vfio_initfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = { static void register_vfio_pci_dev_type(void) { type_register_static(&vfio_pci_dev_info); + /* + * Register notifier when machine init is done, since we need + * check the configration manner after all vfio device are inited. + */ + qemu_add_machine_init_done_notifier(&machine_notifier); } type_init(register_vfio_pci_dev_type) diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 48c1f69..59ae194 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -15,6 +15,7 @@ #include "qemu-common.h" #include "exec/memory.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "hw/pci/pci_bridge.h" #include "hw/vfio/vfio-common.h" #include "qemu/event_notifier.h"