Message ID | 20220705062235.2276125-1-aik@ozlabs.ru (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [kernel] powerpc/iommu: Add simple iommu_ops to report capabilities | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/github-powerpc_selftests | success | Successfully ran 10 jobs. |
snowpatch_ozlabs/github-powerpc_ppctests | success | Successfully ran 10 jobs. |
snowpatch_ozlabs/github-powerpc_kernel_qemu | fail | kernel (g5_defconfig, korg-5.5.0, /linux/arch/powerpc/configs/g5-qemu.config) failed at step build. |
snowpatch_ozlabs/github-powerpc_sparse | success | Successfully ran 4 jobs. |
snowpatch_ozlabs/github-powerpc_clang | success | Successfully ran 7 jobs. |
On 2022-07-05 07:22, Alexey Kardashevskiy wrote: > Historically PPC64 managed to avoid using iommu_ops. The VFIO driver > uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in > the Type1 VFIO driver. Recent development though has added a coherency > capability check to the generic part of VFIO and essentially disabled > VFIO on PPC64. > > This adds a simple iommu_ops stub which reports support for cache > coherency. Because bus_set_iommu() triggers IOMMU probing of PCI devices, > this provides minimum code for the probing to not crash. No more bus_set_iommu() please - I'll be sending a new version of this series very soon: https://lore.kernel.org/linux-iommu/cover.1650890638.git.robin.murphy@arm.com/ Cheers, Robin. > The previous discussion is here: > https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-aik@ozlabs.ru/ > > Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence") > Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices") > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > --- > > I have not looked into the domains for ages, what is missing here? With this > on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks, > > --- > arch/powerpc/include/asm/iommu.h | 2 + > arch/powerpc/kernel/iommu.c | 70 ++++++++++++++++++++++++++++++++ > arch/powerpc/kernel/pci_64.c | 3 ++ > 3 files changed, 75 insertions(+) > > diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h > index 7e29c73e3dd4..4bdae0ee29d0 100644 > --- a/arch/powerpc/include/asm/iommu.h > +++ b/arch/powerpc/include/asm/iommu.h > @@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, > enum dma_data_direction *direction); > extern void iommu_tce_kill(struct iommu_table *tbl, > unsigned long entry, unsigned long pages); > + > +extern const struct iommu_ops spapr_tce_iommu_ops; > #else > static inline void iommu_register_group(struct iommu_table_group *table_group, > int pci_domain_number, > diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c > index 7e56ddb3e0b9..2205b448f7d5 100644 > --- a/arch/powerpc/kernel/iommu.c > +++ b/arch/powerpc/kernel/iommu.c > @@ -1176,4 +1176,74 @@ void iommu_del_device(struct device *dev) > iommu_group_remove_device(dev); > } > EXPORT_SYMBOL_GPL(iommu_del_device); > + > +/* > + * A simple iommu_ops to allow less cruft in generic VFIO code. > + */ > +static bool spapr_tce_iommu_capable(enum iommu_cap cap) > +{ > + switch (cap) { > + case IOMMU_CAP_CACHE_COHERENCY: > + return true; > + default: > + break; > + } > + > + return false; > +} > + > +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) > +{ > + struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL); > + > + if (!domain) > + return NULL; > + > + domain->geometry.aperture_start = 0; > + domain->geometry.aperture_end = ~0ULL; > + domain->geometry.force_aperture = true; > + > + return domain; > +} > + > +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) > +{ > + struct iommu_device *iommu_dev = kzalloc(sizeof(struct iommu_device), GFP_KERNEL); > + > + iommu_dev->dev = dev; > + iommu_dev->ops = &spapr_tce_iommu_ops; > + > + return iommu_dev; > +} > + > +static void spapr_tce_iommu_release_device(struct device *dev) > +{ > +} > + > +static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom, > + struct device *dev) > +{ > + return 0; > +} > + > +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) > +{ > + struct iommu_group *grp = dev->iommu_group; > + > + if (!grp) > + grp = ERR_PTR(-ENODEV); > + return grp; > +} > + > +const struct iommu_ops spapr_tce_iommu_ops = { > + .capable = spapr_tce_iommu_capable, > + .domain_alloc = spapr_tce_iommu_domain_alloc, > + .probe_device = spapr_tce_iommu_probe_device, > + .release_device = spapr_tce_iommu_release_device, > + .device_group = spapr_tce_iommu_device_group, > + .default_domain_ops = &(const struct iommu_domain_ops) { > + .attach_dev = spapr_tce_iommu_attach_dev, > + } > +}; > + > #endif /* CONFIG_IOMMU_API */ > diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c > index 19b03ddf5631..04bc0c52e45c 100644 > --- a/arch/powerpc/kernel/pci_64.c > +++ b/arch/powerpc/kernel/pci_64.c > @@ -20,6 +20,7 @@ > #include <linux/irq.h> > #include <linux/vmalloc.h> > #include <linux/of.h> > +#include <linux/iommu.h> > > #include <asm/processor.h> > #include <asm/io.h> > @@ -27,6 +28,7 @@ > #include <asm/byteorder.h> > #include <asm/machdep.h> > #include <asm/ppc-pci.h> > +#include <asm/iommu.h> > > /* pci_io_base -- the base address from which io bars are offsets. > * This is the lowest I/O base address (so bar values are always positive), > @@ -69,6 +71,7 @@ static int __init pcibios_init(void) > ppc_md.pcibios_fixup(); > > printk(KERN_DEBUG "PCI: Probing PCI hardware done\n"); > + bus_set_iommu(&pci_bus_type, &spapr_tce_iommu_ops); > > return 0; > }
On Tue, Jul 05, 2022 at 04:22:35PM +1000, Alexey Kardashevskiy wrote: > I have not looked into the domains for ages, what is missing here? With this > on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks, Does this solve all the problems or just coherency? It seems like it should solve everything now as there will be a IOMMU_DOMAIN_BLOCKED and the ref logic will succeed to assign it? > +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) > +{ > + struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL); > + > + if (!domain) > + return NULL; This should only succeed if type is IOMMU_DOMAIN_BLOCKED > +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) > +{ > + struct iommu_group *grp = dev->iommu_group; > + > + if (!grp) > + grp = ERR_PTR(-ENODEV); It looks like this should just always fail since the code code already checks iommu_group before calling this? (Arguably ppc should be refactored to use the normal probe_device and device_group ops to create groups, but that doesn't seem critical for this. Thanks Jason
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 7e29c73e3dd4..4bdae0ee29d0 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, enum dma_data_direction *direction); extern void iommu_tce_kill(struct iommu_table *tbl, unsigned long entry, unsigned long pages); + +extern const struct iommu_ops spapr_tce_iommu_ops; #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 7e56ddb3e0b9..2205b448f7d5 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1176,4 +1176,74 @@ void iommu_del_device(struct device *dev) iommu_group_remove_device(dev); } EXPORT_SYMBOL_GPL(iommu_del_device); + +/* + * A simple iommu_ops to allow less cruft in generic VFIO code. + */ +static bool spapr_tce_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + break; + } + + return false; +} + +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) +{ + struct iommu_domain *domain = kzalloc(sizeof(*domain), GFP_KERNEL); + + if (!domain) + return NULL; + + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = ~0ULL; + domain->geometry.force_aperture = true; + + return domain; +} + +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) +{ + struct iommu_device *iommu_dev = kzalloc(sizeof(struct iommu_device), GFP_KERNEL); + + iommu_dev->dev = dev; + iommu_dev->ops = &spapr_tce_iommu_ops; + + return iommu_dev; +} + +static void spapr_tce_iommu_release_device(struct device *dev) +{ +} + +static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom, + struct device *dev) +{ + return 0; +} + +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) +{ + struct iommu_group *grp = dev->iommu_group; + + if (!grp) + grp = ERR_PTR(-ENODEV); + return grp; +} + +const struct iommu_ops spapr_tce_iommu_ops = { + .capable = spapr_tce_iommu_capable, + .domain_alloc = spapr_tce_iommu_domain_alloc, + .probe_device = spapr_tce_iommu_probe_device, + .release_device = spapr_tce_iommu_release_device, + .device_group = spapr_tce_iommu_device_group, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = spapr_tce_iommu_attach_dev, + } +}; + #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 19b03ddf5631..04bc0c52e45c 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -20,6 +20,7 @@ #include <linux/irq.h> #include <linux/vmalloc.h> #include <linux/of.h> +#include <linux/iommu.h> #include <asm/processor.h> #include <asm/io.h> @@ -27,6 +28,7 @@ #include <asm/byteorder.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> +#include <asm/iommu.h> /* pci_io_base -- the base address from which io bars are offsets. * This is the lowest I/O base address (so bar values are always positive), @@ -69,6 +71,7 @@ static int __init pcibios_init(void) ppc_md.pcibios_fixup(); printk(KERN_DEBUG "PCI: Probing PCI hardware done\n"); + bus_set_iommu(&pci_bus_type, &spapr_tce_iommu_ops); return 0; }
Historically PPC64 managed to avoid using iommu_ops. The VFIO driver uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in the Type1 VFIO driver. Recent development though has added a coherency capability check to the generic part of VFIO and essentially disabled VFIO on PPC64. This adds a simple iommu_ops stub which reports support for cache coherency. Because bus_set_iommu() triggers IOMMU probing of PCI devices, this provides minimum code for the probing to not crash. The previous discussion is here: https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-aik@ozlabs.ru/ Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence") Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices") Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> --- I have not looked into the domains for ages, what is missing here? With this on top of 5.19-rc1 VFIO works again on my POWER9 box. Thanks, --- arch/powerpc/include/asm/iommu.h | 2 + arch/powerpc/kernel/iommu.c | 70 ++++++++++++++++++++++++++++++++ arch/powerpc/kernel/pci_64.c | 3 ++ 3 files changed, 75 insertions(+)