Message ID | 1367825157-27231-6-git-send-email-aik@ozlabs.ru |
---|---|
State | New, archived |
Headers | show |
On Mon, May 06, 2013 at 05:25:56PM +1000, Alexey Kardashevskiy wrote: > This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT > and H_STUFF_TCE requests without passing them to QEMU, which should > save time on switching to QEMU and back. > > Both real and virtual modes are supported - whenever the kernel > fails to handle TCE request, it passes it to the virtual mode. > If it the virtual mode handlers fail, then the request is passed > to the user mode, for example, to QEMU. > > This adds a new KVM_CAP_SPAPR_TCE_IOMMU ioctl to asssociate > a virtual PCI bus ID (LIOBN) with an IOMMU group, which enables > in-kernel handling of IOMMU map/unmap. > > This adds a special case for huge pages (16MB). The reference > counting cannot be easily done for such pages in real mode (when > MMU is off) so we added a list of huge pages. It is populated in > virtual mode and get_page is called just once per a huge page. > Real mode handlers check if the requested page is huge and in the list, > then no reference counting is done, otherwise an exit to virtual mode > happens. The list is released at KVM exit. At the moment the fastest > card available for tests uses up to 9 huge pages so walking through this > list is not very expensive. However this can change and we may want > to optimize this. > > This also adds the virt_only parameter to the KVM module > for debug and performance check purposes. > > Tests show that this patch increases transmission speed from 220MB/s > to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card). > > Cc: David Gibson <david@gibson.dropbear.id.au> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > Signed-off-by: Paul Mackerras <paulus@samba.org> > --- > Documentation/virtual/kvm/api.txt | 28 ++++ > arch/powerpc/include/asm/kvm_host.h | 2 + > arch/powerpc/include/asm/kvm_ppc.h | 2 + > arch/powerpc/include/uapi/asm/kvm.h | 7 + > arch/powerpc/kvm/book3s_64_vio.c | 242 ++++++++++++++++++++++++++++++++++- > arch/powerpc/kvm/book3s_64_vio_hv.c | 192 +++++++++++++++++++++++++++ > arch/powerpc/kvm/powerpc.c | 12 ++ > include/uapi/linux/kvm.h | 2 + > 8 files changed, 485 insertions(+), 2 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index f621cd6..2039767 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -2127,6 +2127,34 @@ written, then `n_invalid' invalid entries, invalidating any previously > valid entries found. > > > +4.79 KVM_CREATE_SPAPR_TCE_IOMMU > + > +Capability: KVM_CAP_SPAPR_TCE_IOMMU > +Architectures: powerpc > +Type: vm ioctl > +Parameters: struct kvm_create_spapr_tce_iommu (in) > +Returns: 0 on success, -1 on error > + > +This creates a link between IOMMU group and a hardware TCE (translation > +control entry) table. This link lets the host kernel know what IOMMU > +group (i.e. TCE table) to use for the LIOBN number passed with > +H_PUT_TCE, H_PUT_TCE_INDIRECT, H_STUFF_TCE hypercalls. > + > +/* for KVM_CAP_SPAPR_TCE_IOMMU */ > +struct kvm_create_spapr_tce_iommu { > + __u64 liobn; > + __u32 iommu_id; Wouldn't it be more in keeping > + __u32 flags; > +}; > + > +No flag is supported at the moment. > + > +When the guest issues TCE call on a liobn for which a TCE table has been > +registered, the kernel will handle it in real mode, updating the hardware > +TCE table. TCE table calls for other liobns will cause a vm exit and must > +be handled by userspace. > + > + > 5. The kvm_run structure > ------------------------ > > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index 36ceb0d..2b70cbc 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -178,6 +178,8 @@ struct kvmppc_spapr_tce_table { > struct kvm *kvm; > u64 liobn; > u32 window_size; > + bool virtmode_only; I see this is now initialized from the global parameter, but I think it would be better to just check the global (debug) parameter directly, rather than duplicating it here. > + struct iommu_group *grp; /* used for IOMMU groups */ > struct page *pages[0]; > }; > > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > index d501246..bdfa140 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -139,6 +139,8 @@ extern void kvmppc_xics_free(struct kvm *kvm); > > extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, > struct kvm_create_spapr_tce *args); > +extern long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, > + struct kvm_create_spapr_tce_iommu *args); > extern struct kvmppc_spapr_tce_table *kvmppc_find_tce_table( > struct kvm_vcpu *vcpu, unsigned long liobn); > extern long kvmppc_emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt, > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > index 681b314..b67d44b 100644 > --- a/arch/powerpc/include/uapi/asm/kvm.h > +++ b/arch/powerpc/include/uapi/asm/kvm.h > @@ -291,6 +291,13 @@ struct kvm_create_spapr_tce { > __u32 window_size; > }; > > +/* for KVM_CAP_SPAPR_TCE_IOMMU */ > +struct kvm_create_spapr_tce_iommu { > + __u64 liobn; > + __u32 iommu_id; > + __u32 flags; > +}; > + > /* for KVM_ALLOCATE_RMA */ > struct kvm_allocate_rma { > __u64 rma_size; > diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c > index 643ac1e..98cf949 100644 > --- a/arch/powerpc/kvm/book3s_64_vio.c > +++ b/arch/powerpc/kvm/book3s_64_vio.c > @@ -27,6 +27,9 @@ > #include <linux/hugetlb.h> > #include <linux/list.h> > #include <linux/anon_inodes.h> > +#include <linux/pci.h> > +#include <linux/iommu.h> > +#include <linux/module.h> > > #include <asm/tlbflush.h> > #include <asm/kvm_ppc.h> > @@ -38,10 +41,19 @@ > #include <asm/kvm_host.h> > #include <asm/udbg.h> > #include <asm/iommu.h> > +#include <asm/tce.h> > + > +#define DRIVER_VERSION "0.1" > +#define DRIVER_AUTHOR "Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>" > +#define DRIVER_DESC "POWERPC KVM driver" Really? > > #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) > #define ERROR_ADDR (~(unsigned long)0x0) > > +static bool kvmppc_tce_virt_only = false; > +module_param_named(virt_only, kvmppc_tce_virt_only, bool, S_IRUGO | S_IWUSR); > +MODULE_PARM_DESC(virt_only, "Disable realmode handling of IOMMU map/unmap"); > + > /* > * TCE tables handlers. > */ > @@ -58,8 +70,13 @@ static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) > > mutex_lock(&kvm->lock); > list_del(&stt->list); > - for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) > - __free_page(stt->pages[i]); > +#ifdef CONFIG_IOMMU_API > + if (stt->grp) { > + iommu_group_put(stt->grp); > + } else > +#endif > + for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) > + __free_page(stt->pages[i]); > kfree(stt); > mutex_unlock(&kvm->lock); > > @@ -155,9 +172,127 @@ fail: > return ret; > } > > +#ifdef CONFIG_IOMMU_API > +static const struct file_operations kvm_spapr_tce_iommu_fops = { > + .release = kvm_spapr_tce_release, > +}; > + > +long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, > + struct kvm_create_spapr_tce_iommu *args) > +{ > + struct kvmppc_spapr_tce_table *tt = NULL; > + struct iommu_group *grp; > + struct iommu_table *tbl; > + > + /* Find an IOMMU table for the given ID */ > + grp = iommu_group_get_by_id(args->iommu_id); > + if (!grp) > + return -ENXIO; > + > + tbl = iommu_group_get_iommudata(grp); > + if (!tbl) > + return -ENXIO; > + > + /* Check this LIOBN hasn't been previously allocated */ > + list_for_each_entry(tt, &kvm->arch.spapr_tce_tables, list) { > + if (tt->liobn == args->liobn) > + return -EBUSY; > + } > + > + tt = kzalloc(sizeof(*tt), GFP_KERNEL); > + if (!tt) > + return -ENOMEM; > + > + tt->liobn = args->liobn; > + tt->kvm = kvm; > + tt->virtmode_only = kvmppc_tce_virt_only; > + tt->grp = grp; > + > + kvm_get_kvm(kvm); > + > + mutex_lock(&kvm->lock); > + list_add(&tt->list, &kvm->arch.spapr_tce_tables); > + > + mutex_unlock(&kvm->lock); > + > + pr_debug("LIOBN=%llX hooked to IOMMU %d, flags=%u\n", > + args->liobn, args->iommu_id, args->flags); > + > + return anon_inode_getfd("kvm-spapr-tce-iommu", > + &kvm_spapr_tce_iommu_fops, tt, O_RDWR); > +} > +#else > +long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, > + struct kvm_create_spapr_tce_iommu *args) > +{ > + return -ENOSYS; > +} > +#endif /* CONFIG_IOMMU_API */ > + > +#ifdef CONFIG_IOMMU_API > /* > * Virtual mode handling of IOMMU map/unmap. > */ > +static int clear_tce_virt_mode(struct iommu_table *tbl, > + unsigned long ioba, unsigned long tce_value, > + unsigned long npages) > +{ > + int ret; > + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; > + > + ret = iommu_tce_clear_param_check(tbl, ioba, tce_value, npages); > + if (ret) > + return ret; > + > + ret = iommu_clear_tces_and_put_pages(tbl, entry, npages); > + if (ret < 0) > + pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%d\n", > + __func__, ioba, tce_value, ret); > + > + return ret; > +} > + > +static int put_tce_virt_mode(struct kvmppc_spapr_tce_table *tt, > + struct iommu_table *tbl, > + unsigned long ioba, unsigned long tce, > + pte_t pte, unsigned long pg_size) > +{ > + int ret; > + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; > + > + ret = iommu_tce_put_param_check(tbl, ioba, tce); > + if (ret) > + return ret; > + > + /* System page size case, easy to handle */ > + if (pg_size == PAGE_SIZE) > + return iommu_put_tce_user_mode(tbl, entry, tce); > + > + return -EAGAIN; > +} > + > +static pte_t va_to_linux_pte(struct kvm_vcpu *vcpu, > + unsigned long hva, bool writing, unsigned long *pg_sizep) > +{ > +#ifdef CONFIG_KVM_BOOK3S_64_HV > + /* Find out the page pte and size if requested */ > + pte_t pte; > + unsigned long pg_size = 0; > + > + pte = lookup_linux_pte(vcpu->arch.pgdir, hva, > + writing, &pg_size); > + if (!pte_present(pte)) > + return 0; > + > + *pg_sizep = pg_size; > + > + return pte; > +#else > + return 0; > +#endif > +} > +#endif /* CONFIG_IOMMU_API */ > + > /* Converts guest physical address into host virtual */ > static unsigned long get_virt_address(struct kvm_vcpu *vcpu, > unsigned long gpa) > @@ -188,6 +323,43 @@ long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu, > if (!tt) > return H_TOO_HARD; > > +#ifdef CONFIG_IOMMU_API > + if (tt->grp) { > + long ret; > + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); > + > + /* Return error if the group is being destroyed */ > + if (!tbl) > + return H_RESCINDED; > + > + if (tce & (TCE_PCI_READ | TCE_PCI_WRITE)) { > + unsigned long hpa, pg_size = 0; > + pte_t pte; > + > + hpa = get_virt_address(vcpu, tce); > + if (hpa == ERROR_ADDR) > + return -EFAULT; > + > + pte = va_to_linux_pte(vcpu, hpa, tce & TCE_PCI_WRITE, > + &pg_size); > + if (!pte) > + return -EFAULT; > + > + ret = put_tce_virt_mode(tt, tbl, ioba, hpa, > + pte, pg_size); > + } else { > + ret = clear_tce_virt_mode(tbl, ioba, 0, 1); > + } > + iommu_flush_tce(tbl); > + > + WARN_ON(ret == -EAGAIN); > + if (ret < 0) > + return H_PARAMETER; > + > + return H_SUCCESS; > + } > +#endif > + > /* Emulated IO */ > return kvmppc_emulated_h_put_tce(tt, ioba, tce); > } > @@ -213,6 +385,52 @@ long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu, > if (tces == ERROR_ADDR) > return H_TOO_HARD; > > +#ifdef CONFIG_IOMMU_API > + if (tt->grp) { > + long ret = 0; > + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); > + > + /* Return error if the group is being destroyed */ > + if (!tbl) > + return H_RESCINDED; > + > + for (i = 0; i < npages; ++i) { > + unsigned long hpa, pg_size = 0; > + pte_t pte = 0; > + unsigned long tce; > + unsigned long ptce = tces + i * sizeof(unsigned long); > + > + if (get_user(tce, (unsigned long __user *)ptce)) > + break; > + > + hpa = get_virt_address(vcpu, tce); > + if (hpa == ERROR_ADDR) > + return -EFAULT; > + > + pte = va_to_linux_pte(vcpu, hpa, > + tce & TCE_PCI_WRITE, &pg_size); > + if (!pte) > + return -EFAULT; > + > + ret = put_tce_virt_mode(tt, tbl, > + ioba + (i << IOMMU_PAGE_SHIFT), > + hpa, pte, pg_size); > + if (ret) > + break; > + } > + if (ret) > + clear_tce_virt_mode(tbl, ioba, 0, i); > + > + iommu_flush_tce(tbl); > + > + WARN_ON(ret == -EAGAIN); > + if (ret < 0) > + return H_PARAMETER; > + > + return H_SUCCESS; > + } > +#endif > + > /* Emulated IO */ > if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) > return H_PARAMETER; > @@ -253,6 +471,26 @@ long kvmppc_virtmode_h_stuff_tce(struct kvm_vcpu *vcpu, > if (!tt) > return H_TOO_HARD; > > +#ifdef CONFIG_IOMMU_API > + if (tt->grp) { > + long ret; > + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); > + > + /* Return error if the group is being destroyed */ > + if (!tbl) > + return H_RESCINDED; > + > + ret = clear_tce_virt_mode(tbl, ioba, > + tce_value, npages); > + > + WARN_ON(ret == -EAGAIN); > + if (ret < 0) > + return H_PARAMETER; > + > + return H_SUCCESS; > + } > +#endif > + > /* Emulated IO */ > if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) > return H_PARAMETER; > diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c > index 55fdf7a..c5e5905 100644 > --- a/arch/powerpc/kvm/book3s_64_vio_hv.c > +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c > @@ -26,6 +26,7 @@ > #include <linux/slab.h> > #include <linux/hugetlb.h> > #include <linux/list.h> > +#include <linux/iommu.h> > > #include <asm/tlbflush.h> > #include <asm/kvm_ppc.h> > @@ -161,6 +162,85 @@ static unsigned long get_real_address(struct kvm_vcpu *vcpu, > return hwaddr; > } > > +#ifdef CONFIG_IOMMU_API > +static int clear_tce_real_mode(struct iommu_table *tbl, > + unsigned long ioba, > + unsigned long tce_value, unsigned long npages) > +{ > + int ret; > + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; > + > + ret = iommu_tce_clear_param_check(tbl, ioba, tce_value, npages); > + if (ret) > + return ret; > + > + for ( ; npages; --npages, ++entry) { > + struct page *page; > + unsigned long oldtce; > + > + oldtce = iommu_clear_tce(tbl, entry); > + if (!oldtce) > + continue; > + > + page = realmode_pfn_to_page(oldtce >> PAGE_SHIFT); > + if (!page) { > + ret = -EAGAIN; > + break; > + } > + > + if (oldtce & TCE_PCI_WRITE) > + SetPageDirty(page); > + > + ret = realmode_put_page(page); > + if (ret) > + break; > + } > + /* if (ret < 0) > + pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%d\n", > + __func__, ioba, tce_value, ret); */ > + > + return ret; > +} > + > +static int put_tce_real_mode(struct kvmppc_spapr_tce_table *tt, > + struct iommu_table *tbl, > + unsigned long ioba, unsigned long tce, > + pte_t pte, unsigned long pg_size) > +{ > + int ret; > + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; > + struct page *page = NULL; > + enum dma_data_direction direction = iommu_tce_direction(tce); > + > + ret = iommu_tce_put_param_check(tbl, ioba, tce); > + if (ret) > + return ret; > + > + if (pg_size != PAGE_SIZE) > + return -EAGAIN; > + > + /* Small page case, find page struct to increment a counter */ > + page = realmode_pfn_to_page(tce >> PAGE_SHIFT); > + if (!page) > + return -EAGAIN; > + > + ret = realmode_get_page(page); > + if (ret) > + return ret; > + > + /* tce_build accepts virtual addresses */ > + ret = iommu_tce_build(tbl, entry, (unsigned long) __va(tce), direction); > + if (ret) > + realmode_put_page(page); > + > + /* if (ret < 0) > + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", > + __func__, ioba, tce, ret); */ > + > + return ret; > +} > +#endif /* CONFIG_IOMMU_API */ > + > long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, > unsigned long ioba, unsigned long tce) > { > @@ -171,6 +251,44 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, > if (!tt) > return H_TOO_HARD; > > + if (tt->virtmode_only) > + return H_TOO_HARD; > + > +#ifdef CONFIG_IOMMU_API > + if (tt->grp) { > + long ret; > + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); > + > + /* Return error if the group is being destroyed */ > + if (!tbl) > + return H_RESCINDED; > + > + if (tce & (TCE_PCI_READ | TCE_PCI_WRITE)) { > + unsigned long hpa, pg_size = 0; > + pte_t pte = 0; > + > + hpa = get_real_address(vcpu, tce, tce & TCE_PCI_WRITE, > + &pte, &pg_size); > + if (hpa == ERROR_ADDR) > + return H_TOO_HARD; > + > + ret = put_tce_real_mode(tt, tbl, ioba, > + hpa, pte, pg_size); > + } else { > + ret = clear_tce_real_mode(tbl, ioba, 0, 1); > + } > + iommu_flush_tce(tbl); > + > + if (ret == -EAGAIN) > + return H_TOO_HARD; > + > + if (ret < 0) > + return H_PARAMETER; > + > + return H_SUCCESS; > + } > +#endif > + > /* Emulated IO */ > return kvmppc_emulated_h_put_tce(tt, ioba, tce); > } > @@ -192,10 +310,58 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, > if (!tt) > return H_TOO_HARD; > > + if (tt->virtmode_only) > + return H_TOO_HARD; > + > tces = get_real_address(vcpu, tce_list, false, NULL, NULL); > if (tces == ERROR_ADDR) > return H_TOO_HARD; > > +#ifdef CONFIG_IOMMU_API > + if (tt->grp) { > + long ret = 0; > + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); > + > + /* Return error if the group is being destroyed */ > + if (!tbl) > + return H_RESCINDED; > + > + for (i = 0; i < npages; ++i) { > + unsigned long hpa, pg_size = 0; > + pte_t pte = 0; > + unsigned long tce; > + unsigned long ptce = tces + i * sizeof(unsigned long); > + > + if (get_user(tce, (unsigned long __user *)ptce)) > + break; > + > + hpa = get_real_address(vcpu, tce, > + tce & TCE_PCI_WRITE, > + &pte, &pg_size); > + if (hpa == ERROR_ADDR) > + ret = -EAGAIN; > + else > + ret = put_tce_real_mode(tt, tbl, > + ioba + (i << IOMMU_PAGE_SHIFT), > + hpa, pte, pg_size); > + if (ret) > + break; > + } > + if (ret) > + clear_tce_real_mode(tbl, ioba, 0, i); > + > + iommu_flush_tce(tbl); > + > + if (ret == -EAGAIN) > + return H_TOO_HARD; > + > + if (ret < 0) > + return H_PARAMETER; > + > + return H_SUCCESS; > + } > +#endif > + > /* Emulated IO */ > if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) > return H_PARAMETER; > @@ -236,6 +402,32 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, > if (!tt) > return H_TOO_HARD; > > + if (tt->virtmode_only) > + return H_TOO_HARD; > + > +#ifdef CONFIG_IOMMU_API > + if (tt->grp) { > + long ret; > + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); > + > + /* Return error if the group is being destroyed */ > + if (!tbl) > + return H_RESCINDED; > + > + ret = clear_tce_real_mode(tbl, ioba, > + tce_value, npages); > + iommu_flush_tce(tbl); > + > + if (ret == -EAGAIN) > + return H_TOO_HARD; > + > + if (ret < 0) > + return H_PARAMETER; > + > + return H_SUCCESS; > + } > +#endif > + > /* Emulated IO */ > if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) > return H_PARAMETER; > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index b7ad589..269b0f6 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -385,6 +385,7 @@ int kvm_dev_ioctl_check_extension(long ext) > break; > #endif > case KVM_CAP_SPAPR_MULTITCE: > + case KVM_CAP_SPAPR_TCE_IOMMU: > r = 1; > break; > default: > @@ -935,6 +936,17 @@ long kvm_arch_vm_ioctl(struct file *filp, > r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); > goto out; > } > + case KVM_CREATE_SPAPR_TCE_IOMMU: { > + struct kvm_create_spapr_tce_iommu create_tce_iommu; > + struct kvm *kvm = filp->private_data; > + > + r = -EFAULT; > + if (copy_from_user(&create_tce_iommu, argp, > + sizeof(create_tce_iommu))) > + goto out; > + r = kvm_vm_ioctl_create_spapr_tce_iommu(kvm, &create_tce_iommu); > + goto out; > + } > #endif /* CONFIG_PPC_BOOK3S_64 */ > > #ifdef CONFIG_KVM_BOOK3S_64_HV > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 6c04da1..161e1d3 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -641,6 +641,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_PPC_RTAS (0x100000 + 87) > #define KVM_CAP_SPAPR_XICS (0x100000 + 88) > #define KVM_CAP_SPAPR_MULTITCE (0x110000 + 89) > +#define KVM_CAP_SPAPR_TCE_IOMMU (0x110000 + 90) > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -885,6 +886,7 @@ struct kvm_s390_ucas_mapping { > #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) > /* Available with KVM_CAP_PPC_RTAS */ > #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xdc, struct kvm_rtas_token_args) > +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO, 0xaf, struct kvm_create_spapr_tce_iommu) > > /* > * ioctls for vcpu fds
On 05/07/2013 03:29 PM, David Gibson wrote: > On Mon, May 06, 2013 at 05:25:56PM +1000, Alexey Kardashevskiy wrote: >> This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT >> and H_STUFF_TCE requests without passing them to QEMU, which should >> save time on switching to QEMU and back. >> >> Both real and virtual modes are supported - whenever the kernel >> fails to handle TCE request, it passes it to the virtual mode. >> If it the virtual mode handlers fail, then the request is passed >> to the user mode, for example, to QEMU. >> >> This adds a new KVM_CAP_SPAPR_TCE_IOMMU ioctl to asssociate >> a virtual PCI bus ID (LIOBN) with an IOMMU group, which enables >> in-kernel handling of IOMMU map/unmap. >> >> This adds a special case for huge pages (16MB). The reference >> counting cannot be easily done for such pages in real mode (when >> MMU is off) so we added a list of huge pages. It is populated in >> virtual mode and get_page is called just once per a huge page. >> Real mode handlers check if the requested page is huge and in the list, >> then no reference counting is done, otherwise an exit to virtual mode >> happens. The list is released at KVM exit. At the moment the fastest >> card available for tests uses up to 9 huge pages so walking through this >> list is not very expensive. However this can change and we may want >> to optimize this. >> >> This also adds the virt_only parameter to the KVM module >> for debug and performance check purposes. >> >> Tests show that this patch increases transmission speed from 220MB/s >> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card). >> >> Cc: David Gibson <david@gibson.dropbear.id.au> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >> Signed-off-by: Paul Mackerras <paulus@samba.org> >> --- >> Documentation/virtual/kvm/api.txt | 28 ++++ >> arch/powerpc/include/asm/kvm_host.h | 2 + >> arch/powerpc/include/asm/kvm_ppc.h | 2 + >> arch/powerpc/include/uapi/asm/kvm.h | 7 + >> arch/powerpc/kvm/book3s_64_vio.c | 242 ++++++++++++++++++++++++++++++++++- >> arch/powerpc/kvm/book3s_64_vio_hv.c | 192 +++++++++++++++++++++++++++ >> arch/powerpc/kvm/powerpc.c | 12 ++ >> include/uapi/linux/kvm.h | 2 + >> 8 files changed, 485 insertions(+), 2 deletions(-) >> >> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt >> index f621cd6..2039767 100644 >> --- a/Documentation/virtual/kvm/api.txt >> +++ b/Documentation/virtual/kvm/api.txt >> @@ -2127,6 +2127,34 @@ written, then `n_invalid' invalid entries, invalidating any previously >> valid entries found. >> >> >> +4.79 KVM_CREATE_SPAPR_TCE_IOMMU >> + >> +Capability: KVM_CAP_SPAPR_TCE_IOMMU >> +Architectures: powerpc >> +Type: vm ioctl >> +Parameters: struct kvm_create_spapr_tce_iommu (in) >> +Returns: 0 on success, -1 on error >> + >> +This creates a link between IOMMU group and a hardware TCE (translation >> +control entry) table. This link lets the host kernel know what IOMMU >> +group (i.e. TCE table) to use for the LIOBN number passed with >> +H_PUT_TCE, H_PUT_TCE_INDIRECT, H_STUFF_TCE hypercalls. >> + >> +/* for KVM_CAP_SPAPR_TCE_IOMMU */ >> +struct kvm_create_spapr_tce_iommu { >> + __u64 liobn; >> + __u32 iommu_id; > > Wouldn't it be more in keeping pardon? >> + __u32 flags; >> +}; >> + >> +No flag is supported at the moment. >> + >> +When the guest issues TCE call on a liobn for which a TCE table has been >> +registered, the kernel will handle it in real mode, updating the hardware >> +TCE table. TCE table calls for other liobns will cause a vm exit and must >> +be handled by userspace. >> + >> + >> 5. The kvm_run structure >> ------------------------ >> >> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h >> index 36ceb0d..2b70cbc 100644 >> --- a/arch/powerpc/include/asm/kvm_host.h >> +++ b/arch/powerpc/include/asm/kvm_host.h >> @@ -178,6 +178,8 @@ struct kvmppc_spapr_tce_table { >> struct kvm *kvm; >> u64 liobn; >> u32 window_size; >> + bool virtmode_only; > > I see this is now initialized from the global parameter, but I think > it would be better to just check the global (debug) parameter > directly, rather than duplicating it here. The global parameter is in kvm.ko and the struct above is in the real mode part which cannot go to the module. >> + struct iommu_group *grp; /* used for IOMMU groups */ >> struct page *pages[0]; >> }; >> >> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h >> index d501246..bdfa140 100644 >> --- a/arch/powerpc/include/asm/kvm_ppc.h >> +++ b/arch/powerpc/include/asm/kvm_ppc.h >> @@ -139,6 +139,8 @@ extern void kvmppc_xics_free(struct kvm *kvm); >> >> extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, >> struct kvm_create_spapr_tce *args); >> +extern long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, >> + struct kvm_create_spapr_tce_iommu *args); >> extern struct kvmppc_spapr_tce_table *kvmppc_find_tce_table( >> struct kvm_vcpu *vcpu, unsigned long liobn); >> extern long kvmppc_emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt, >> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h >> index 681b314..b67d44b 100644 >> --- a/arch/powerpc/include/uapi/asm/kvm.h >> +++ b/arch/powerpc/include/uapi/asm/kvm.h >> @@ -291,6 +291,13 @@ struct kvm_create_spapr_tce { >> __u32 window_size; >> }; >> >> +/* for KVM_CAP_SPAPR_TCE_IOMMU */ >> +struct kvm_create_spapr_tce_iommu { >> + __u64 liobn; >> + __u32 iommu_id; >> + __u32 flags; >> +}; >> + >> /* for KVM_ALLOCATE_RMA */ >> struct kvm_allocate_rma { >> __u64 rma_size; >> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c >> index 643ac1e..98cf949 100644 >> --- a/arch/powerpc/kvm/book3s_64_vio.c >> +++ b/arch/powerpc/kvm/book3s_64_vio.c >> @@ -27,6 +27,9 @@ >> #include <linux/hugetlb.h> >> #include <linux/list.h> >> #include <linux/anon_inodes.h> >> +#include <linux/pci.h> >> +#include <linux/iommu.h> >> +#include <linux/module.h> >> >> #include <asm/tlbflush.h> >> #include <asm/kvm_ppc.h> >> @@ -38,10 +41,19 @@ >> #include <asm/kvm_host.h> >> #include <asm/udbg.h> >> #include <asm/iommu.h> >> +#include <asm/tce.h> >> + >> +#define DRIVER_VERSION "0.1" >> +#define DRIVER_AUTHOR "Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>" >> +#define DRIVER_DESC "POWERPC KVM driver" > > Really? What is wrong here?
On Tue, May 07, 2013 at 03:51:31PM +1000, Alexey Kardashevskiy wrote: > On 05/07/2013 03:29 PM, David Gibson wrote: > > On Mon, May 06, 2013 at 05:25:56PM +1000, Alexey Kardashevskiy wrote: > >> This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT > >> and H_STUFF_TCE requests without passing them to QEMU, which should > >> save time on switching to QEMU and back. > >> > >> Both real and virtual modes are supported - whenever the kernel > >> fails to handle TCE request, it passes it to the virtual mode. > >> If it the virtual mode handlers fail, then the request is passed > >> to the user mode, for example, to QEMU. > >> > >> This adds a new KVM_CAP_SPAPR_TCE_IOMMU ioctl to asssociate > >> a virtual PCI bus ID (LIOBN) with an IOMMU group, which enables > >> in-kernel handling of IOMMU map/unmap. > >> > >> This adds a special case for huge pages (16MB). The reference > >> counting cannot be easily done for such pages in real mode (when > >> MMU is off) so we added a list of huge pages. It is populated in > >> virtual mode and get_page is called just once per a huge page. > >> Real mode handlers check if the requested page is huge and in the list, > >> then no reference counting is done, otherwise an exit to virtual mode > >> happens. The list is released at KVM exit. At the moment the fastest > >> card available for tests uses up to 9 huge pages so walking through this > >> list is not very expensive. However this can change and we may want > >> to optimize this. > >> > >> This also adds the virt_only parameter to the KVM module > >> for debug and performance check purposes. > >> > >> Tests show that this patch increases transmission speed from 220MB/s > >> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card). > >> > >> Cc: David Gibson <david@gibson.dropbear.id.au> > >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > >> Signed-off-by: Paul Mackerras <paulus@samba.org> > >> --- > >> Documentation/virtual/kvm/api.txt | 28 ++++ > >> arch/powerpc/include/asm/kvm_host.h | 2 + > >> arch/powerpc/include/asm/kvm_ppc.h | 2 + > >> arch/powerpc/include/uapi/asm/kvm.h | 7 + > >> arch/powerpc/kvm/book3s_64_vio.c | 242 ++++++++++++++++++++++++++++++++++- > >> arch/powerpc/kvm/book3s_64_vio_hv.c | 192 +++++++++++++++++++++++++++ > >> arch/powerpc/kvm/powerpc.c | 12 ++ > >> include/uapi/linux/kvm.h | 2 + > >> 8 files changed, 485 insertions(+), 2 deletions(-) > >> > >> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > >> index f621cd6..2039767 100644 > >> --- a/Documentation/virtual/kvm/api.txt > >> +++ b/Documentation/virtual/kvm/api.txt > >> @@ -2127,6 +2127,34 @@ written, then `n_invalid' invalid entries, invalidating any previously > >> valid entries found. > >> > >> > >> +4.79 KVM_CREATE_SPAPR_TCE_IOMMU > >> + > >> +Capability: KVM_CAP_SPAPR_TCE_IOMMU > >> +Architectures: powerpc > >> +Type: vm ioctl > >> +Parameters: struct kvm_create_spapr_tce_iommu (in) > >> +Returns: 0 on success, -1 on error > >> + > >> +This creates a link between IOMMU group and a hardware TCE (translation > >> +control entry) table. This link lets the host kernel know what IOMMU > >> +group (i.e. TCE table) to use for the LIOBN number passed with > >> +H_PUT_TCE, H_PUT_TCE_INDIRECT, H_STUFF_TCE hypercalls. > >> + > >> +/* for KVM_CAP_SPAPR_TCE_IOMMU */ > >> +struct kvm_create_spapr_tce_iommu { > >> + __u64 liobn; > >> + __u32 iommu_id; > > > > Wouldn't it be more in keeping > > > pardon? Sorry, I was going to suggest a change, but then realised it wasn't actually any better than what you have now. > >> + __u32 flags; > >> +}; > >> + > >> +No flag is supported at the moment. > >> + > >> +When the guest issues TCE call on a liobn for which a TCE table has been > >> +registered, the kernel will handle it in real mode, updating the hardware > >> +TCE table. TCE table calls for other liobns will cause a vm exit and must > >> +be handled by userspace. > >> + > >> + > >> 5. The kvm_run structure > >> ------------------------ > >> > >> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > >> index 36ceb0d..2b70cbc 100644 > >> --- a/arch/powerpc/include/asm/kvm_host.h > >> +++ b/arch/powerpc/include/asm/kvm_host.h > >> @@ -178,6 +178,8 @@ struct kvmppc_spapr_tce_table { > >> struct kvm *kvm; > >> u64 liobn; > >> u32 window_size; > >> + bool virtmode_only; > > > > I see this is now initialized from the global parameter, but I think > > it would be better to just check the global (debug) parameter > > directly, rather than duplicating it here. > > > The global parameter is in kvm.ko and the struct above is in the real mode > part which cannot go to the module. Ah, ok. I'm half inclined to just drop the virtmode_only thing entirely. > >> + struct iommu_group *grp; /* used for IOMMU groups */ > >> struct page *pages[0]; > >> }; > >> > >> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > >> index d501246..bdfa140 100644 > >> --- a/arch/powerpc/include/asm/kvm_ppc.h > >> +++ b/arch/powerpc/include/asm/kvm_ppc.h > >> @@ -139,6 +139,8 @@ extern void kvmppc_xics_free(struct kvm *kvm); > >> > >> extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, > >> struct kvm_create_spapr_tce *args); > >> +extern long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, > >> + struct kvm_create_spapr_tce_iommu *args); > >> extern struct kvmppc_spapr_tce_table *kvmppc_find_tce_table( > >> struct kvm_vcpu *vcpu, unsigned long liobn); > >> extern long kvmppc_emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt, > >> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > >> index 681b314..b67d44b 100644 > >> --- a/arch/powerpc/include/uapi/asm/kvm.h > >> +++ b/arch/powerpc/include/uapi/asm/kvm.h > >> @@ -291,6 +291,13 @@ struct kvm_create_spapr_tce { > >> __u32 window_size; > >> }; > >> > >> +/* for KVM_CAP_SPAPR_TCE_IOMMU */ > >> +struct kvm_create_spapr_tce_iommu { > >> + __u64 liobn; > >> + __u32 iommu_id; > >> + __u32 flags; > >> +}; > >> + > >> /* for KVM_ALLOCATE_RMA */ > >> struct kvm_allocate_rma { > >> __u64 rma_size; > >> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c > >> index 643ac1e..98cf949 100644 > >> --- a/arch/powerpc/kvm/book3s_64_vio.c > >> +++ b/arch/powerpc/kvm/book3s_64_vio.c > >> @@ -27,6 +27,9 @@ > >> #include <linux/hugetlb.h> > >> #include <linux/list.h> > >> #include <linux/anon_inodes.h> > >> +#include <linux/pci.h> > >> +#include <linux/iommu.h> > >> +#include <linux/module.h> > >> > >> #include <asm/tlbflush.h> > >> #include <asm/kvm_ppc.h> > >> @@ -38,10 +41,19 @@ > >> #include <asm/kvm_host.h> > >> #include <asm/udbg.h> > >> #include <asm/iommu.h> > >> +#include <asm/tce.h> > >> + > >> +#define DRIVER_VERSION "0.1" > >> +#define DRIVER_AUTHOR "Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>" > >> +#define DRIVER_DESC "POWERPC KVM driver" > > > > Really? > > > What is wrong here? Well, it seems entirely unrelated to the rest of the changes, and not obviously accurate.
On 05/07/2013 04:02 PM, David Gibson wrote: > On Tue, May 07, 2013 at 03:51:31PM +1000, Alexey Kardashevskiy wrote: >> On 05/07/2013 03:29 PM, David Gibson wrote: >>> On Mon, May 06, 2013 at 05:25:56PM +1000, Alexey Kardashevskiy wrote: >>>> This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT >>>> and H_STUFF_TCE requests without passing them to QEMU, which should >>>> save time on switching to QEMU and back. >>>> >>>> Both real and virtual modes are supported - whenever the kernel >>>> fails to handle TCE request, it passes it to the virtual mode. >>>> If it the virtual mode handlers fail, then the request is passed >>>> to the user mode, for example, to QEMU. >>>> >>>> This adds a new KVM_CAP_SPAPR_TCE_IOMMU ioctl to asssociate >>>> a virtual PCI bus ID (LIOBN) with an IOMMU group, which enables >>>> in-kernel handling of IOMMU map/unmap. >>>> >>>> This adds a special case for huge pages (16MB). The reference >>>> counting cannot be easily done for such pages in real mode (when >>>> MMU is off) so we added a list of huge pages. It is populated in >>>> virtual mode and get_page is called just once per a huge page. >>>> Real mode handlers check if the requested page is huge and in the list, >>>> then no reference counting is done, otherwise an exit to virtual mode >>>> happens. The list is released at KVM exit. At the moment the fastest >>>> card available for tests uses up to 9 huge pages so walking through this >>>> list is not very expensive. However this can change and we may want >>>> to optimize this. >>>> >>>> This also adds the virt_only parameter to the KVM module >>>> for debug and performance check purposes. >>>> >>>> Tests show that this patch increases transmission speed from 220MB/s >>>> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card). >>>> >>>> Cc: David Gibson <david@gibson.dropbear.id.au> >>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>>> Signed-off-by: Paul Mackerras <paulus@samba.org> >>>> --- >>>> Documentation/virtual/kvm/api.txt | 28 ++++ >>>> arch/powerpc/include/asm/kvm_host.h | 2 + >>>> arch/powerpc/include/asm/kvm_ppc.h | 2 + >>>> arch/powerpc/include/uapi/asm/kvm.h | 7 + >>>> arch/powerpc/kvm/book3s_64_vio.c | 242 ++++++++++++++++++++++++++++++++++- >>>> arch/powerpc/kvm/book3s_64_vio_hv.c | 192 +++++++++++++++++++++++++++ >>>> arch/powerpc/kvm/powerpc.c | 12 ++ >>>> include/uapi/linux/kvm.h | 2 + >>>> 8 files changed, 485 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt >>>> index f621cd6..2039767 100644 >>>> --- a/Documentation/virtual/kvm/api.txt >>>> +++ b/Documentation/virtual/kvm/api.txt >>>> @@ -2127,6 +2127,34 @@ written, then `n_invalid' invalid entries, invalidating any previously >>>> valid entries found. >>>> >>>> >>>> +4.79 KVM_CREATE_SPAPR_TCE_IOMMU >>>> + >>>> +Capability: KVM_CAP_SPAPR_TCE_IOMMU >>>> +Architectures: powerpc >>>> +Type: vm ioctl >>>> +Parameters: struct kvm_create_spapr_tce_iommu (in) >>>> +Returns: 0 on success, -1 on error >>>> + >>>> +This creates a link between IOMMU group and a hardware TCE (translation >>>> +control entry) table. This link lets the host kernel know what IOMMU >>>> +group (i.e. TCE table) to use for the LIOBN number passed with >>>> +H_PUT_TCE, H_PUT_TCE_INDIRECT, H_STUFF_TCE hypercalls. >>>> + >>>> +/* for KVM_CAP_SPAPR_TCE_IOMMU */ >>>> +struct kvm_create_spapr_tce_iommu { >>>> + __u64 liobn; >>>> + __u32 iommu_id; >>> >>> Wouldn't it be more in keeping >> >> >> pardon? > > Sorry, I was going to suggest a change, but then realised it wasn't > actually any better than what you have now. > >>>> + __u32 flags; >>>> +}; >>>> + >>>> +No flag is supported at the moment. >>>> + >>>> +When the guest issues TCE call on a liobn for which a TCE table has been >>>> +registered, the kernel will handle it in real mode, updating the hardware >>>> +TCE table. TCE table calls for other liobns will cause a vm exit and must >>>> +be handled by userspace. >>>> + >>>> + >>>> 5. The kvm_run structure >>>> ------------------------ >>>> >>>> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h >>>> index 36ceb0d..2b70cbc 100644 >>>> --- a/arch/powerpc/include/asm/kvm_host.h >>>> +++ b/arch/powerpc/include/asm/kvm_host.h >>>> @@ -178,6 +178,8 @@ struct kvmppc_spapr_tce_table { >>>> struct kvm *kvm; >>>> u64 liobn; >>>> u32 window_size; >>>> + bool virtmode_only; >>> >>> I see this is now initialized from the global parameter, but I think >>> it would be better to just check the global (debug) parameter >>> directly, rather than duplicating it here. >> >> >> The global parameter is in kvm.ko and the struct above is in the real mode >> part which cannot go to the module. > > Ah, ok. I'm half inclined to just drop the virtmode_only thing > entirely. > >>>> + struct iommu_group *grp; /* used for IOMMU groups */ >>>> struct page *pages[0]; >>>> }; >>>> >>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h >>>> index d501246..bdfa140 100644 >>>> --- a/arch/powerpc/include/asm/kvm_ppc.h >>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h >>>> @@ -139,6 +139,8 @@ extern void kvmppc_xics_free(struct kvm *kvm); >>>> >>>> extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, >>>> struct kvm_create_spapr_tce *args); >>>> +extern long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, >>>> + struct kvm_create_spapr_tce_iommu *args); >>>> extern struct kvmppc_spapr_tce_table *kvmppc_find_tce_table( >>>> struct kvm_vcpu *vcpu, unsigned long liobn); >>>> extern long kvmppc_emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt, >>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h >>>> index 681b314..b67d44b 100644 >>>> --- a/arch/powerpc/include/uapi/asm/kvm.h >>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h >>>> @@ -291,6 +291,13 @@ struct kvm_create_spapr_tce { >>>> __u32 window_size; >>>> }; >>>> >>>> +/* for KVM_CAP_SPAPR_TCE_IOMMU */ >>>> +struct kvm_create_spapr_tce_iommu { >>>> + __u64 liobn; >>>> + __u32 iommu_id; >>>> + __u32 flags; >>>> +}; >>>> + >>>> /* for KVM_ALLOCATE_RMA */ >>>> struct kvm_allocate_rma { >>>> __u64 rma_size; >>>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c >>>> index 643ac1e..98cf949 100644 >>>> --- a/arch/powerpc/kvm/book3s_64_vio.c >>>> +++ b/arch/powerpc/kvm/book3s_64_vio.c >>>> @@ -27,6 +27,9 @@ >>>> #include <linux/hugetlb.h> >>>> #include <linux/list.h> >>>> #include <linux/anon_inodes.h> >>>> +#include <linux/pci.h> >>>> +#include <linux/iommu.h> >>>> +#include <linux/module.h> >>>> >>>> #include <asm/tlbflush.h> >>>> #include <asm/kvm_ppc.h> >>>> @@ -38,10 +41,19 @@ >>>> #include <asm/kvm_host.h> >>>> #include <asm/udbg.h> >>>> #include <asm/iommu.h> >>>> +#include <asm/tce.h> >>>> + >>>> +#define DRIVER_VERSION "0.1" >>>> +#define DRIVER_AUTHOR "Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>" >>>> +#define DRIVER_DESC "POWERPC KVM driver" >>> >>> Really? >> >> >> What is wrong here? > > Well, it seems entirely unrelated to the rest of the changes, The patch adds a module parameter so I had to add those DRIVER_xxx. > and not obviously accurate. Let's fix it then. How? Paul signed it...
On Tue, May 07, 2013 at 04:27:49PM +1000, Alexey Kardashevskiy wrote: > On 05/07/2013 04:02 PM, David Gibson wrote: > > On Tue, May 07, 2013 at 03:51:31PM +1000, Alexey Kardashevskiy wrote: > >> On 05/07/2013 03:29 PM, David Gibson wrote: > >>> On Mon, May 06, 2013 at 05:25:56PM +1000, Alexey Kardashevskiy wrote: [snip] > >>>> +#define DRIVER_VERSION "0.1" > >>>> +#define DRIVER_AUTHOR "Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>" > >>>> +#define DRIVER_DESC "POWERPC KVM driver" > >>> > >>> Really? > >> > >> > >> What is wrong here? > > > > Well, it seems entirely unrelated to the rest of the changes, > > > The patch adds a module parameter so I had to add those DRIVER_xxx. Ah, ok. > > and not obviously accurate. > > Let's fix it then. How? Paul signed it... Fair enough then.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f621cd6..2039767 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2127,6 +2127,34 @@ written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.79 KVM_CREATE_SPAPR_TCE_IOMMU + +Capability: KVM_CAP_SPAPR_TCE_IOMMU +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_create_spapr_tce_iommu (in) +Returns: 0 on success, -1 on error + +This creates a link between IOMMU group and a hardware TCE (translation +control entry) table. This link lets the host kernel know what IOMMU +group (i.e. TCE table) to use for the LIOBN number passed with +H_PUT_TCE, H_PUT_TCE_INDIRECT, H_STUFF_TCE hypercalls. + +/* for KVM_CAP_SPAPR_TCE_IOMMU */ +struct kvm_create_spapr_tce_iommu { + __u64 liobn; + __u32 iommu_id; + __u32 flags; +}; + +No flag is supported at the moment. + +When the guest issues TCE call on a liobn for which a TCE table has been +registered, the kernel will handle it in real mode, updating the hardware +TCE table. TCE table calls for other liobns will cause a vm exit and must +be handled by userspace. + + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 36ceb0d..2b70cbc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -178,6 +178,8 @@ struct kvmppc_spapr_tce_table { struct kvm *kvm; u64 liobn; u32 window_size; + bool virtmode_only; + struct iommu_group *grp; /* used for IOMMU groups */ struct page *pages[0]; }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index d501246..bdfa140 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -139,6 +139,8 @@ extern void kvmppc_xics_free(struct kvm *kvm); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); +extern long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, + struct kvm_create_spapr_tce_iommu *args); extern struct kvmppc_spapr_tce_table *kvmppc_find_tce_table( struct kvm_vcpu *vcpu, unsigned long liobn); extern long kvmppc_emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt, diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 681b314..b67d44b 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -291,6 +291,13 @@ struct kvm_create_spapr_tce { __u32 window_size; }; +/* for KVM_CAP_SPAPR_TCE_IOMMU */ +struct kvm_create_spapr_tce_iommu { + __u64 liobn; + __u32 iommu_id; + __u32 flags; +}; + /* for KVM_ALLOCATE_RMA */ struct kvm_allocate_rma { __u64 rma_size; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 643ac1e..98cf949 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -27,6 +27,9 @@ #include <linux/hugetlb.h> #include <linux/list.h> #include <linux/anon_inodes.h> +#include <linux/pci.h> +#include <linux/iommu.h> +#include <linux/module.h> #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> @@ -38,10 +41,19 @@ #include <asm/kvm_host.h> #include <asm/udbg.h> #include <asm/iommu.h> +#include <asm/tce.h> + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>" +#define DRIVER_DESC "POWERPC KVM driver" #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) #define ERROR_ADDR (~(unsigned long)0x0) +static bool kvmppc_tce_virt_only = false; +module_param_named(virt_only, kvmppc_tce_virt_only, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(virt_only, "Disable realmode handling of IOMMU map/unmap"); + /* * TCE tables handlers. */ @@ -58,8 +70,13 @@ static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) mutex_lock(&kvm->lock); list_del(&stt->list); - for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) - __free_page(stt->pages[i]); +#ifdef CONFIG_IOMMU_API + if (stt->grp) { + iommu_group_put(stt->grp); + } else +#endif + for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) + __free_page(stt->pages[i]); kfree(stt); mutex_unlock(&kvm->lock); @@ -155,9 +172,127 @@ fail: return ret; } +#ifdef CONFIG_IOMMU_API +static const struct file_operations kvm_spapr_tce_iommu_fops = { + .release = kvm_spapr_tce_release, +}; + +long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, + struct kvm_create_spapr_tce_iommu *args) +{ + struct kvmppc_spapr_tce_table *tt = NULL; + struct iommu_group *grp; + struct iommu_table *tbl; + + /* Find an IOMMU table for the given ID */ + grp = iommu_group_get_by_id(args->iommu_id); + if (!grp) + return -ENXIO; + + tbl = iommu_group_get_iommudata(grp); + if (!tbl) + return -ENXIO; + + /* Check this LIOBN hasn't been previously allocated */ + list_for_each_entry(tt, &kvm->arch.spapr_tce_tables, list) { + if (tt->liobn == args->liobn) + return -EBUSY; + } + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) + return -ENOMEM; + + tt->liobn = args->liobn; + tt->kvm = kvm; + tt->virtmode_only = kvmppc_tce_virt_only; + tt->grp = grp; + + kvm_get_kvm(kvm); + + mutex_lock(&kvm->lock); + list_add(&tt->list, &kvm->arch.spapr_tce_tables); + + mutex_unlock(&kvm->lock); + + pr_debug("LIOBN=%llX hooked to IOMMU %d, flags=%u\n", + args->liobn, args->iommu_id, args->flags); + + return anon_inode_getfd("kvm-spapr-tce-iommu", + &kvm_spapr_tce_iommu_fops, tt, O_RDWR); +} +#else +long kvm_vm_ioctl_create_spapr_tce_iommu(struct kvm *kvm, + struct kvm_create_spapr_tce_iommu *args) +{ + return -ENOSYS; +} +#endif /* CONFIG_IOMMU_API */ + +#ifdef CONFIG_IOMMU_API /* * Virtual mode handling of IOMMU map/unmap. */ +static int clear_tce_virt_mode(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce_value, + unsigned long npages) +{ + int ret; + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; + + ret = iommu_tce_clear_param_check(tbl, ioba, tce_value, npages); + if (ret) + return ret; + + ret = iommu_clear_tces_and_put_pages(tbl, entry, npages); + if (ret < 0) + pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%d\n", + __func__, ioba, tce_value, ret); + + return ret; +} + +static int put_tce_virt_mode(struct kvmppc_spapr_tce_table *tt, + struct iommu_table *tbl, + unsigned long ioba, unsigned long tce, + pte_t pte, unsigned long pg_size) +{ + int ret; + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; + + ret = iommu_tce_put_param_check(tbl, ioba, tce); + if (ret) + return ret; + + /* System page size case, easy to handle */ + if (pg_size == PAGE_SIZE) + return iommu_put_tce_user_mode(tbl, entry, tce); + + return -EAGAIN; +} + +static pte_t va_to_linux_pte(struct kvm_vcpu *vcpu, + unsigned long hva, bool writing, unsigned long *pg_sizep) +{ +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* Find out the page pte and size if requested */ + pte_t pte; + unsigned long pg_size = 0; + + pte = lookup_linux_pte(vcpu->arch.pgdir, hva, + writing, &pg_size); + if (!pte_present(pte)) + return 0; + + *pg_sizep = pg_size; + + return pte; +#else + return 0; +#endif +} +#endif /* CONFIG_IOMMU_API */ + /* Converts guest physical address into host virtual */ static unsigned long get_virt_address(struct kvm_vcpu *vcpu, unsigned long gpa) @@ -188,6 +323,43 @@ long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu, if (!tt) return H_TOO_HARD; +#ifdef CONFIG_IOMMU_API + if (tt->grp) { + long ret; + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); + + /* Return error if the group is being destroyed */ + if (!tbl) + return H_RESCINDED; + + if (tce & (TCE_PCI_READ | TCE_PCI_WRITE)) { + unsigned long hpa, pg_size = 0; + pte_t pte; + + hpa = get_virt_address(vcpu, tce); + if (hpa == ERROR_ADDR) + return -EFAULT; + + pte = va_to_linux_pte(vcpu, hpa, tce & TCE_PCI_WRITE, + &pg_size); + if (!pte) + return -EFAULT; + + ret = put_tce_virt_mode(tt, tbl, ioba, hpa, + pte, pg_size); + } else { + ret = clear_tce_virt_mode(tbl, ioba, 0, 1); + } + iommu_flush_tce(tbl); + + WARN_ON(ret == -EAGAIN); + if (ret < 0) + return H_PARAMETER; + + return H_SUCCESS; + } +#endif + /* Emulated IO */ return kvmppc_emulated_h_put_tce(tt, ioba, tce); } @@ -213,6 +385,52 @@ long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (tces == ERROR_ADDR) return H_TOO_HARD; +#ifdef CONFIG_IOMMU_API + if (tt->grp) { + long ret = 0; + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); + + /* Return error if the group is being destroyed */ + if (!tbl) + return H_RESCINDED; + + for (i = 0; i < npages; ++i) { + unsigned long hpa, pg_size = 0; + pte_t pte = 0; + unsigned long tce; + unsigned long ptce = tces + i * sizeof(unsigned long); + + if (get_user(tce, (unsigned long __user *)ptce)) + break; + + hpa = get_virt_address(vcpu, tce); + if (hpa == ERROR_ADDR) + return -EFAULT; + + pte = va_to_linux_pte(vcpu, hpa, + tce & TCE_PCI_WRITE, &pg_size); + if (!pte) + return -EFAULT; + + ret = put_tce_virt_mode(tt, tbl, + ioba + (i << IOMMU_PAGE_SHIFT), + hpa, pte, pg_size); + if (ret) + break; + } + if (ret) + clear_tce_virt_mode(tbl, ioba, 0, i); + + iommu_flush_tce(tbl); + + WARN_ON(ret == -EAGAIN); + if (ret < 0) + return H_PARAMETER; + + return H_SUCCESS; + } +#endif + /* Emulated IO */ if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) return H_PARAMETER; @@ -253,6 +471,26 @@ long kvmppc_virtmode_h_stuff_tce(struct kvm_vcpu *vcpu, if (!tt) return H_TOO_HARD; +#ifdef CONFIG_IOMMU_API + if (tt->grp) { + long ret; + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); + + /* Return error if the group is being destroyed */ + if (!tbl) + return H_RESCINDED; + + ret = clear_tce_virt_mode(tbl, ioba, + tce_value, npages); + + WARN_ON(ret == -EAGAIN); + if (ret < 0) + return H_PARAMETER; + + return H_SUCCESS; + } +#endif + /* Emulated IO */ if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) return H_PARAMETER; diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 55fdf7a..c5e5905 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/hugetlb.h> #include <linux/list.h> +#include <linux/iommu.h> #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> @@ -161,6 +162,85 @@ static unsigned long get_real_address(struct kvm_vcpu *vcpu, return hwaddr; } +#ifdef CONFIG_IOMMU_API +static int clear_tce_real_mode(struct iommu_table *tbl, + unsigned long ioba, + unsigned long tce_value, unsigned long npages) +{ + int ret; + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; + + ret = iommu_tce_clear_param_check(tbl, ioba, tce_value, npages); + if (ret) + return ret; + + for ( ; npages; --npages, ++entry) { + struct page *page; + unsigned long oldtce; + + oldtce = iommu_clear_tce(tbl, entry); + if (!oldtce) + continue; + + page = realmode_pfn_to_page(oldtce >> PAGE_SHIFT); + if (!page) { + ret = -EAGAIN; + break; + } + + if (oldtce & TCE_PCI_WRITE) + SetPageDirty(page); + + ret = realmode_put_page(page); + if (ret) + break; + } + /* if (ret < 0) + pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%d\n", + __func__, ioba, tce_value, ret); */ + + return ret; +} + +static int put_tce_real_mode(struct kvmppc_spapr_tce_table *tt, + struct iommu_table *tbl, + unsigned long ioba, unsigned long tce, + pte_t pte, unsigned long pg_size) +{ + int ret; + unsigned long entry = ioba >> IOMMU_PAGE_SHIFT; + struct page *page = NULL; + enum dma_data_direction direction = iommu_tce_direction(tce); + + ret = iommu_tce_put_param_check(tbl, ioba, tce); + if (ret) + return ret; + + if (pg_size != PAGE_SIZE) + return -EAGAIN; + + /* Small page case, find page struct to increment a counter */ + page = realmode_pfn_to_page(tce >> PAGE_SHIFT); + if (!page) + return -EAGAIN; + + ret = realmode_get_page(page); + if (ret) + return ret; + + /* tce_build accepts virtual addresses */ + ret = iommu_tce_build(tbl, entry, (unsigned long) __va(tce), direction); + if (ret) + realmode_put_page(page); + + /* if (ret < 0) + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", + __func__, ioba, tce, ret); */ + + return ret; +} +#endif /* CONFIG_IOMMU_API */ + long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { @@ -171,6 +251,44 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, if (!tt) return H_TOO_HARD; + if (tt->virtmode_only) + return H_TOO_HARD; + +#ifdef CONFIG_IOMMU_API + if (tt->grp) { + long ret; + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); + + /* Return error if the group is being destroyed */ + if (!tbl) + return H_RESCINDED; + + if (tce & (TCE_PCI_READ | TCE_PCI_WRITE)) { + unsigned long hpa, pg_size = 0; + pte_t pte = 0; + + hpa = get_real_address(vcpu, tce, tce & TCE_PCI_WRITE, + &pte, &pg_size); + if (hpa == ERROR_ADDR) + return H_TOO_HARD; + + ret = put_tce_real_mode(tt, tbl, ioba, + hpa, pte, pg_size); + } else { + ret = clear_tce_real_mode(tbl, ioba, 0, 1); + } + iommu_flush_tce(tbl); + + if (ret == -EAGAIN) + return H_TOO_HARD; + + if (ret < 0) + return H_PARAMETER; + + return H_SUCCESS; + } +#endif + /* Emulated IO */ return kvmppc_emulated_h_put_tce(tt, ioba, tce); } @@ -192,10 +310,58 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (!tt) return H_TOO_HARD; + if (tt->virtmode_only) + return H_TOO_HARD; + tces = get_real_address(vcpu, tce_list, false, NULL, NULL); if (tces == ERROR_ADDR) return H_TOO_HARD; +#ifdef CONFIG_IOMMU_API + if (tt->grp) { + long ret = 0; + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); + + /* Return error if the group is being destroyed */ + if (!tbl) + return H_RESCINDED; + + for (i = 0; i < npages; ++i) { + unsigned long hpa, pg_size = 0; + pte_t pte = 0; + unsigned long tce; + unsigned long ptce = tces + i * sizeof(unsigned long); + + if (get_user(tce, (unsigned long __user *)ptce)) + break; + + hpa = get_real_address(vcpu, tce, + tce & TCE_PCI_WRITE, + &pte, &pg_size); + if (hpa == ERROR_ADDR) + ret = -EAGAIN; + else + ret = put_tce_real_mode(tt, tbl, + ioba + (i << IOMMU_PAGE_SHIFT), + hpa, pte, pg_size); + if (ret) + break; + } + if (ret) + clear_tce_real_mode(tbl, ioba, 0, i); + + iommu_flush_tce(tbl); + + if (ret == -EAGAIN) + return H_TOO_HARD; + + if (ret < 0) + return H_PARAMETER; + + return H_SUCCESS; + } +#endif + /* Emulated IO */ if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) return H_PARAMETER; @@ -236,6 +402,32 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, if (!tt) return H_TOO_HARD; + if (tt->virtmode_only) + return H_TOO_HARD; + +#ifdef CONFIG_IOMMU_API + if (tt->grp) { + long ret; + struct iommu_table *tbl = iommu_group_get_iommudata(tt->grp); + + /* Return error if the group is being destroyed */ + if (!tbl) + return H_RESCINDED; + + ret = clear_tce_real_mode(tbl, ioba, + tce_value, npages); + iommu_flush_tce(tbl); + + if (ret == -EAGAIN) + return H_TOO_HARD; + + if (ret < 0) + return H_PARAMETER; + + return H_SUCCESS; + } +#endif + /* Emulated IO */ if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size) return H_PARAMETER; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b7ad589..269b0f6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -385,6 +385,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; #endif case KVM_CAP_SPAPR_MULTITCE: + case KVM_CAP_SPAPR_TCE_IOMMU: r = 1; break; default: @@ -935,6 +936,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); goto out; } + case KVM_CREATE_SPAPR_TCE_IOMMU: { + struct kvm_create_spapr_tce_iommu create_tce_iommu; + struct kvm *kvm = filp->private_data; + + r = -EFAULT; + if (copy_from_user(&create_tce_iommu, argp, + sizeof(create_tce_iommu))) + goto out; + r = kvm_vm_ioctl_create_spapr_tce_iommu(kvm, &create_tce_iommu); + goto out; + } #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_64_HV diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6c04da1..161e1d3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -641,6 +641,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_RTAS (0x100000 + 87) #define KVM_CAP_SPAPR_XICS (0x100000 + 88) #define KVM_CAP_SPAPR_MULTITCE (0x110000 + 89) +#define KVM_CAP_SPAPR_TCE_IOMMU (0x110000 + 90) #ifdef KVM_CAP_IRQ_ROUTING @@ -885,6 +886,7 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* Available with KVM_CAP_PPC_RTAS */ #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xdc, struct kvm_rtas_token_args) +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO, 0xaf, struct kvm_create_spapr_tce_iommu) /* * ioctls for vcpu fds