Message ID | 1269497376-21903-1-git-send-email-cam@cs.ualberta.ca |
---|---|
State | New |
Headers | show |
On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: > This patch adds a driver for my shared memory PCI device using the uio_pci > interface. The driver has three memory regions. The first memory region is for > device registers for sending interrupts. The second BAR is for receiving MSI-X > interrupts and the third memory region maps the shared memory. The device only > exports the first and third memory regions to userspace. > > This driver supports MSI-X and regular pin interrupts. Currently, the number of > MSI vectors is set to 4 which could be increased, but the driver will work with > fewer vectors. If MSI is not available, then regular interrupts will be used. > --- > drivers/uio/Kconfig | 8 ++ > drivers/uio/Makefile | 1 + > drivers/uio/uio_ivshmem.c | 235 +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 244 insertions(+), 0 deletions(-) > create mode 100644 drivers/uio/uio_ivshmem.c > > diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig > index 1da73ec..b92cded 100644 > --- a/drivers/uio/Kconfig > +++ b/drivers/uio/Kconfig > @@ -74,6 +74,14 @@ config UIO_SERCOS3 > > If you compile this as a module, it will be called uio_sercos3. > > +config UIO_IVSHMEM > + tristate "KVM shared memory PCI driver" > + default n > + help > + Userspace I/O interface for the KVM shared memory device. This > + driver will make available two memory regions, the first is > + registers and the second is a region for sharing between VMs. > + > config UIO_PCI_GENERIC > tristate "Generic driver for PCI 2.3 and PCI Express cards" > depends on PCI > diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile > index 18fd818..25c1ca5 100644 > --- a/drivers/uio/Makefile > +++ b/drivers/uio/Makefile > @@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC) += uio_aec.o > obj-$(CONFIG_UIO_SERCOS3) += uio_sercos3.o > obj-$(CONFIG_UIO_PCI_GENERIC) += uio_pci_generic.o > obj-$(CONFIG_UIO_NETX) += uio_netx.o > +obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o > diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c > new file mode 100644 > index 0000000..607435b > --- /dev/null > +++ b/drivers/uio/uio_ivshmem.c > @@ -0,0 +1,235 @@ > +/* > + * UIO IVShmem Driver > + * > + * (C) 2009 Cam Macdonell > + * based on Hilscher CIF card driver (C) 2007 Hans J. Koch <hjk@linutronix.de> > + * > + * Licensed under GPL version 2 only. > + * > + */ > + > +#include <linux/device.h> > +#include <linux/module.h> > +#include <linux/pci.h> > +#include <linux/uio_driver.h> > + > +#include <asm/io.h> > + > +#define IntrStatus 0x04 > +#define IntrMask 0x00 > + > +struct ivshmem_info { > + struct uio_info *uio; > + struct pci_dev *dev; > + char (*msix_names)[256]; > + struct msix_entry *msix_entries; > + int nvectors; > +}; > + > +static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info) > +{ > + > + void __iomem *plx_intscr = dev_info->mem[0].internal_addr > + + IntrStatus; > + u32 val; > + > + val = readl(plx_intscr); > + if (val == 0) > + return IRQ_NONE; > + > + printk(KERN_INFO "Regular interrupt (val = %d)\n", val); > + return IRQ_HANDLED; > +} > + > +static irqreturn_t ivshmem_msix_handler(int irq, void *opaque) > +{ > + > + struct uio_info * dev_info = (struct uio_info *) opaque; > + > + /* we have to do this explicitly when using MSI-X */ > + uio_event_notify(dev_info); How does userspace know which vector was triggered? > + printk(KERN_INFO "MSI-X interrupt (%d)\n", irq); > + return IRQ_HANDLED; > + extra empty line > +} > + > +static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors) > +{ > + int i, err; > + const char *name = "ivshmem"; > + > + printk(KERN_INFO "devname is %s\n", name); These KERN_INFO messages need to be cleaned up, they would be look confusing in the log. > + ivs_info->nvectors = nvectors; > + > + > + ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries, > + GFP_KERNEL); > + ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names, > + GFP_KERNEL); > + need to handle errors > + for (i = 0; i < nvectors; ++i) > + ivs_info->msix_entries[i].entry = i; > + > + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, > + ivs_info->nvectors); > + if (err > 0) { > + ivs_info->nvectors = err; /* msi-x positive error code > + returns the number available*/ > + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, > + ivs_info->nvectors); > + if (err > 0) { > + printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err); > + return -ENOSPC; > + } > + } we can also get err < 0. > + > + printk(KERN_INFO "err is %d\n", err); > + if (err) return err; coding style rule violation > + > + for (i = 0; i < ivs_info->nvectors; i++) { > + > + snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names, > + "%s-config", name); > + > + ivs_info->msix_entries[i].entry = i; > + err = request_irq(ivs_info->msix_entries[i].vector, > + ivshmem_msix_handler, 0, > + ivs_info->msix_names[i], ivs_info->uio); > + > + if (err) { > + return -ENOSPC; coding style rule violation no undo on error handling why override error code with -ENOSPC? > + } > + } > + > + return 0; > +} > + > +static int __devinit ivshmem_pci_probe(struct pci_dev *dev, > + const struct pci_device_id *id) > +{ > + struct uio_info *info; > + struct ivshmem_info * ivshmem_info; > + int nvectors = 4; > + > + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); > + if (!info) > + return -ENOMEM; > + > + ivshmem_info = kzalloc(sizeof(struct ivshmem_info), GFP_KERNEL); > + if (!ivshmem_info) { > + kfree(info); > + return -ENOMEM; > + } > + > + if (pci_enable_device(dev)) > + goto out_free; > + > + if (pci_request_regions(dev, "ivshmem")) > + goto out_disable; > + > + info->mem[0].addr = pci_resource_start(dev, 0); > + if (!info->mem[0].addr) > + goto out_release; > + > + info->mem[0].size = pci_resource_len(dev, 0); > + info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); > + if (!info->mem[0].internal_addr) { > + printk(KERN_INFO "got a null"); > + goto out_release; > + } > + > + info->mem[0].memtype = UIO_MEM_PHYS; > + > + info->mem[1].addr = pci_resource_start(dev, 2); > + if (!info->mem[1].addr) > + goto out_unmap; > + info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); > + if (!info->mem[1].internal_addr) > + goto out_unmap; > + > + info->mem[1].size = pci_resource_len(dev, 2); > + info->mem[1].memtype = UIO_MEM_PHYS; > + > + ivshmem_info->uio = info; > + ivshmem_info->dev = dev; > + > + if (request_msix_vectors(ivshmem_info, nvectors) != 0) { > + printk(KERN_INFO "regular IRQs\n"); > + info->irq = dev->irq; > + info->irq_flags = IRQF_SHARED; > + info->handler = ivshmem_handler; > + writel(0xffffffff, info->mem[0].internal_addr + IntrMask); > + } else { > + printk(KERN_INFO "MSI-X enabled\n"); > + info->irq = -1; > + } > + > + info->name = "ivshmem"; > + info->version = "0.0.1"; > + > + if (uio_register_device(&dev->dev, info)) > + goto out_unmap2; > + > + pci_set_drvdata(dev, info); > + > + > + return 0; > +out_unmap2: > + iounmap(info->mem[2].internal_addr); > +out_unmap: > + iounmap(info->mem[0].internal_addr); > +out_release: > + pci_release_regions(dev); > +out_disable: > + pci_disable_device(dev); > +out_free: > + kfree (info); > + return -ENODEV; > +} > + > +static void ivshmem_pci_remove(struct pci_dev *dev) > +{ > + struct uio_info *info = pci_get_drvdata(dev); > + > + uio_unregister_device(info); > + pci_release_regions(dev); > + pci_disable_device(dev); > + pci_set_drvdata(dev, NULL); > + iounmap(info->mem[0].internal_addr); > + > + kfree (info); > +} > + > +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { > + { > + .vendor = 0x1af4, > + .device = 0x1110, vendor ids must be registered with PCI SIG. this one does not seem to be registered. > + .subvendor = PCI_ANY_ID, > + .subdevice = PCI_ANY_ID, > + }, > + { 0, } > +}; > + > +static struct pci_driver ivshmem_pci_driver = { > + .name = "uio_ivshmem", > + .id_table = ivshmem_pci_ids, > + .probe = ivshmem_pci_probe, > + .remove = ivshmem_pci_remove, > +}; > + > +static int __init ivshmem_init_module(void) > +{ > + return pci_register_driver(&ivshmem_pci_driver); > +} > + > +static void __exit ivshmem_exit_module(void) > +{ > + pci_unregister_driver(&ivshmem_pci_driver); > +} > + > +module_init(ivshmem_init_module); > +module_exit(ivshmem_exit_module); > + > +MODULE_DEVICE_TABLE(pci, ivshmem_pci_ids); > +MODULE_LICENSE("GPL v2"); > +MODULE_AUTHOR("Cam Macdonell"); > -- > 1.6.6.1 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: > This patch adds a driver for my shared memory PCI device using the uio_pci > interface. The driver has three memory regions. The first memory region is for > device registers for sending interrupts. The second BAR is for receiving MSI-X > interrupts and the third memory region maps the shared memory. The device only > exports the first and third memory regions to userspace. > > This driver supports MSI-X and regular pin interrupts. Currently, the number of > MSI vectors is set to 4 which could be increased, but the driver will work with > fewer vectors. If MSI is not available, then regular interrupts will be used. Some high level questions, sorry if they have been raised in the past: - Can this device use virtio framework? This gives us some standards to work off, with feature negotiation, ability to detect config changes, support for non-PCI platforms, decent documentation that is easy to extend, legal id range to use. You would thus have your driver in uio/uio_virtio_shmem.c - Why are you using 32 bit long memory accesses for interrupts? 16 bit IO eould be enough and it's faster. This what virtio-pci does. - How was the driver tested? > --- > drivers/uio/Kconfig | 8 ++ > drivers/uio/Makefile | 1 + > drivers/uio/uio_ivshmem.c | 235 +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 244 insertions(+), 0 deletions(-) > create mode 100644 drivers/uio/uio_ivshmem.c > > diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig > index 1da73ec..b92cded 100644 > --- a/drivers/uio/Kconfig > +++ b/drivers/uio/Kconfig > @@ -74,6 +74,14 @@ config UIO_SERCOS3 > > If you compile this as a module, it will be called uio_sercos3. > > +config UIO_IVSHMEM > + tristate "KVM shared memory PCI driver" > + default n > + help > + Userspace I/O interface for the KVM shared memory device. This > + driver will make available two memory regions, the first is > + registers and the second is a region for sharing between VMs. > + > config UIO_PCI_GENERIC > tristate "Generic driver for PCI 2.3 and PCI Express cards" > depends on PCI > diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile > index 18fd818..25c1ca5 100644 > --- a/drivers/uio/Makefile > +++ b/drivers/uio/Makefile > @@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC) += uio_aec.o > obj-$(CONFIG_UIO_SERCOS3) += uio_sercos3.o > obj-$(CONFIG_UIO_PCI_GENERIC) += uio_pci_generic.o > obj-$(CONFIG_UIO_NETX) += uio_netx.o > +obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o > diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c > new file mode 100644 > index 0000000..607435b > --- /dev/null > +++ b/drivers/uio/uio_ivshmem.c > @@ -0,0 +1,235 @@ > +/* > + * UIO IVShmem Driver > + * > + * (C) 2009 Cam Macdonell > + * based on Hilscher CIF card driver (C) 2007 Hans J. Koch <hjk@linutronix.de> > + * > + * Licensed under GPL version 2 only. > + * > + */ > + > +#include <linux/device.h> > +#include <linux/module.h> > +#include <linux/pci.h> > +#include <linux/uio_driver.h> > + > +#include <asm/io.h> > + > +#define IntrStatus 0x04 > +#define IntrMask 0x00 > + > +struct ivshmem_info { > + struct uio_info *uio; > + struct pci_dev *dev; > + char (*msix_names)[256]; > + struct msix_entry *msix_entries; > + int nvectors; > +}; > + > +static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info) > +{ > + > + void __iomem *plx_intscr = dev_info->mem[0].internal_addr > + + IntrStatus; > + u32 val; > + > + val = readl(plx_intscr); > + if (val == 0) > + return IRQ_NONE; > + > + printk(KERN_INFO "Regular interrupt (val = %d)\n", val); > + return IRQ_HANDLED; > +} > + > +static irqreturn_t ivshmem_msix_handler(int irq, void *opaque) > +{ > + > + struct uio_info * dev_info = (struct uio_info *) opaque; > + > + /* we have to do this explicitly when using MSI-X */ > + uio_event_notify(dev_info); > + printk(KERN_INFO "MSI-X interrupt (%d)\n", irq); > + return IRQ_HANDLED; > + > +} > + > +static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors) > +{ > + int i, err; > + const char *name = "ivshmem"; > + > + printk(KERN_INFO "devname is %s\n", name); > + ivs_info->nvectors = nvectors; > + > + > + ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries, > + GFP_KERNEL); > + ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names, > + GFP_KERNEL); > + > + for (i = 0; i < nvectors; ++i) > + ivs_info->msix_entries[i].entry = i; > + > + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, > + ivs_info->nvectors); > + if (err > 0) { > + ivs_info->nvectors = err; /* msi-x positive error code > + returns the number available*/ > + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, > + ivs_info->nvectors); > + if (err > 0) { > + printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err); > + return -ENOSPC; > + } > + } > + > + printk(KERN_INFO "err is %d\n", err); > + if (err) return err; > + > + for (i = 0; i < ivs_info->nvectors; i++) { > + > + snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names, > + "%s-config", name); > + > + ivs_info->msix_entries[i].entry = i; > + err = request_irq(ivs_info->msix_entries[i].vector, > + ivshmem_msix_handler, 0, > + ivs_info->msix_names[i], ivs_info->uio); > + > + if (err) { > + return -ENOSPC; > + } > + } > + > + return 0; > +} > + > +static int __devinit ivshmem_pci_probe(struct pci_dev *dev, > + const struct pci_device_id *id) > +{ > + struct uio_info *info; > + struct ivshmem_info * ivshmem_info; > + int nvectors = 4; > + > + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); > + if (!info) > + return -ENOMEM; > + > + ivshmem_info = kzalloc(sizeof(struct ivshmem_info), GFP_KERNEL); > + if (!ivshmem_info) { > + kfree(info); > + return -ENOMEM; > + } > + > + if (pci_enable_device(dev)) > + goto out_free; > + > + if (pci_request_regions(dev, "ivshmem")) > + goto out_disable; > + > + info->mem[0].addr = pci_resource_start(dev, 0); > + if (!info->mem[0].addr) > + goto out_release; > + > + info->mem[0].size = pci_resource_len(dev, 0); > + info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); > + if (!info->mem[0].internal_addr) { > + printk(KERN_INFO "got a null"); > + goto out_release; > + } > + > + info->mem[0].memtype = UIO_MEM_PHYS; > + > + info->mem[1].addr = pci_resource_start(dev, 2); > + if (!info->mem[1].addr) > + goto out_unmap; > + info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); > + if (!info->mem[1].internal_addr) > + goto out_unmap; > + > + info->mem[1].size = pci_resource_len(dev, 2); > + info->mem[1].memtype = UIO_MEM_PHYS; > + > + ivshmem_info->uio = info; > + ivshmem_info->dev = dev; > + > + if (request_msix_vectors(ivshmem_info, nvectors) != 0) { > + printk(KERN_INFO "regular IRQs\n"); > + info->irq = dev->irq; > + info->irq_flags = IRQF_SHARED; > + info->handler = ivshmem_handler; > + writel(0xffffffff, info->mem[0].internal_addr + IntrMask); > + } else { > + printk(KERN_INFO "MSI-X enabled\n"); > + info->irq = -1; > + } > + > + info->name = "ivshmem"; > + info->version = "0.0.1"; > + > + if (uio_register_device(&dev->dev, info)) > + goto out_unmap2; > + > + pci_set_drvdata(dev, info); > + > + > + return 0; > +out_unmap2: > + iounmap(info->mem[2].internal_addr); > +out_unmap: > + iounmap(info->mem[0].internal_addr); > +out_release: > + pci_release_regions(dev); > +out_disable: > + pci_disable_device(dev); > +out_free: > + kfree (info); > + return -ENODEV; > +} > + > +static void ivshmem_pci_remove(struct pci_dev *dev) > +{ > + struct uio_info *info = pci_get_drvdata(dev); > + > + uio_unregister_device(info); > + pci_release_regions(dev); > + pci_disable_device(dev); > + pci_set_drvdata(dev, NULL); > + iounmap(info->mem[0].internal_addr); > + > + kfree (info); > +} > + > +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { > + { > + .vendor = 0x1af4, > + .device = 0x1110, > + .subvendor = PCI_ANY_ID, > + .subdevice = PCI_ANY_ID, > + }, > + { 0, } > +}; > + > +static struct pci_driver ivshmem_pci_driver = { > + .name = "uio_ivshmem", > + .id_table = ivshmem_pci_ids, > + .probe = ivshmem_pci_probe, > + .remove = ivshmem_pci_remove, > +}; > + > +static int __init ivshmem_init_module(void) > +{ > + return pci_register_driver(&ivshmem_pci_driver); > +} > + > +static void __exit ivshmem_exit_module(void) > +{ > + pci_unregister_driver(&ivshmem_pci_driver); > +} > + > +module_init(ivshmem_init_module); > +module_exit(ivshmem_exit_module); > + > +MODULE_DEVICE_TABLE(pci, ivshmem_pci_ids); > +MODULE_LICENSE("GPL v2"); > +MODULE_AUTHOR("Cam Macdonell"); > -- > 1.6.6.1 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/25/2010 11:15 AM, Michael S. Tsirkin wrote: > > - Why are you using 32 bit long memory accesses for interrupts? > 16 bit IO eould be enough and it's faster. This what virtio-pci does. > > Why is 16 bit io faster?
On Thu, Mar 25, 2010 at 11:40:09AM +0200, Avi Kivity wrote: > On 03/25/2010 11:15 AM, Michael S. Tsirkin wrote: >> >> - Why are you using 32 bit long memory accesses for interrupts? >> 16 bit IO eould be enough and it's faster. This what virtio-pci does. >> >> > > Why is 16 bit io faster? Something to do with need for emulation to get address/data for pci memory accesses? > -- > error compiling committee.c: too many arguments to function
On 03/25/2010 08:09 AM, Cam Macdonell wrote: > This patch adds a driver for my shared memory PCI device using the uio_pci > interface. The driver has three memory regions. The first memory region is for > device registers for sending interrupts. The second BAR is for receiving MSI-X > interrupts and the third memory region maps the shared memory. The device only > exports the first and third memory regions to userspace. > > This driver supports MSI-X and regular pin interrupts. Currently, the number of > MSI vectors is set to 4 which could be increased, but the driver will work with > fewer vectors. If MSI is not available, then regular interrupts will be used. > There is now a generic PCI 2.3 driver that can handle all PCI devices. It doesn't support MSI, but if we add MSI support then it can be used without the need for a specialized driver.
On 03/25/2010 11:05 AM, Michael S. Tsirkin wrote: > coding style rule violation > Plus it uses spaces instead of tabs for indents.
On 03/25/2010 11:44 AM, Michael S. Tsirkin wrote: > On Thu, Mar 25, 2010 at 11:40:09AM +0200, Avi Kivity wrote: > >> On 03/25/2010 11:15 AM, Michael S. Tsirkin wrote: >> >>> - Why are you using 32 bit long memory accesses for interrupts? >>> 16 bit IO eould be enough and it's faster. This what virtio-pci does. >>> >>> >>> >> Why is 16 bit io faster? >> > Something to do with need for emulation to get address/data > for pci memory accesses? > pio is definitely faster than mmio (all that is needed is to set one bit on the BAR). But 32 vs. 16 makes no difference.
On Thu, Mar 25, 2010 at 11:58:30AM +0200, Avi Kivity wrote: > On 03/25/2010 11:44 AM, Michael S. Tsirkin wrote: >> On Thu, Mar 25, 2010 at 11:40:09AM +0200, Avi Kivity wrote: >> >>> On 03/25/2010 11:15 AM, Michael S. Tsirkin wrote: >>> >>>> - Why are you using 32 bit long memory accesses for interrupts? >>>> 16 bit IO eould be enough and it's faster. This what virtio-pci does. >>>> >>>> >>>> >>> Why is 16 bit io faster? >>> >> Something to do with need for emulation to get address/data >> for pci memory accesses? >> > > pio is definitely faster than mmio (all that is needed is to set one bit > on the BAR). But 32 vs. 16 makes no difference. Right. That's what I meant. > -- > error compiling committee.c: too many arguments to function
On Thu, Mar 25, 2010 at 3:15 AM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: >> This patch adds a driver for my shared memory PCI device using the uio_pci >> interface. The driver has three memory regions. The first memory region is for >> device registers for sending interrupts. The second BAR is for receiving MSI-X >> interrupts and the third memory region maps the shared memory. The device only >> exports the first and third memory regions to userspace. >> >> This driver supports MSI-X and regular pin interrupts. Currently, the number of >> MSI vectors is set to 4 which could be increased, but the driver will work with >> fewer vectors. If MSI is not available, then regular interrupts will be used. > > Some high level questions, sorry if they have been raised in the past: > - Can this device use virtio framework? > This gives us some standards to work off, with feature negotiation, > ability to detect config changes, support for non-PCI > platforms, decent documentation that is easy to extend, > legal id range to use. > You would thus have your driver in uio/uio_virtio_shmem.c There has been previous discussion of virtio, however while virtio is good for exporting guest memory, it's not ideal for importing memory into a guest. > > - Why are you using 32 bit long memory accesses for interrupts? > 16 bit IO eould be enough and it's faster. This what virtio-pci does. > > - How was the driver tested? I have some test programs in the git repo I linked to. I've been using some simple producer/consumer tests to test the interrupt framework. > >> --- >> drivers/uio/Kconfig | 8 ++ >> drivers/uio/Makefile | 1 + >> drivers/uio/uio_ivshmem.c | 235 +++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 244 insertions(+), 0 deletions(-) >> create mode 100644 drivers/uio/uio_ivshmem.c >> >> diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig >> index 1da73ec..b92cded 100644 >> --- a/drivers/uio/Kconfig >> +++ b/drivers/uio/Kconfig >> @@ -74,6 +74,14 @@ config UIO_SERCOS3 >> >> If you compile this as a module, it will be called uio_sercos3. >> >> +config UIO_IVSHMEM >> + tristate "KVM shared memory PCI driver" >> + default n >> + help >> + Userspace I/O interface for the KVM shared memory device. This >> + driver will make available two memory regions, the first is >> + registers and the second is a region for sharing between VMs. >> + >> config UIO_PCI_GENERIC >> tristate "Generic driver for PCI 2.3 and PCI Express cards" >> depends on PCI >> diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile >> index 18fd818..25c1ca5 100644 >> --- a/drivers/uio/Makefile >> +++ b/drivers/uio/Makefile >> @@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC) += uio_aec.o >> obj-$(CONFIG_UIO_SERCOS3) += uio_sercos3.o >> obj-$(CONFIG_UIO_PCI_GENERIC) += uio_pci_generic.o >> obj-$(CONFIG_UIO_NETX) += uio_netx.o >> +obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o >> diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c >> new file mode 100644 >> index 0000000..607435b >> --- /dev/null >> +++ b/drivers/uio/uio_ivshmem.c >> @@ -0,0 +1,235 @@ >> +/* >> + * UIO IVShmem Driver >> + * >> + * (C) 2009 Cam Macdonell >> + * based on Hilscher CIF card driver (C) 2007 Hans J. Koch <hjk@linutronix.de> >> + * >> + * Licensed under GPL version 2 only. >> + * >> + */ >> + >> +#include <linux/device.h> >> +#include <linux/module.h> >> +#include <linux/pci.h> >> +#include <linux/uio_driver.h> >> + >> +#include <asm/io.h> >> + >> +#define IntrStatus 0x04 >> +#define IntrMask 0x00 >> + >> +struct ivshmem_info { >> + struct uio_info *uio; >> + struct pci_dev *dev; >> + char (*msix_names)[256]; >> + struct msix_entry *msix_entries; >> + int nvectors; >> +}; >> + >> +static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info) >> +{ >> + >> + void __iomem *plx_intscr = dev_info->mem[0].internal_addr >> + + IntrStatus; >> + u32 val; >> + >> + val = readl(plx_intscr); >> + if (val == 0) >> + return IRQ_NONE; >> + >> + printk(KERN_INFO "Regular interrupt (val = %d)\n", val); >> + return IRQ_HANDLED; >> +} >> + >> +static irqreturn_t ivshmem_msix_handler(int irq, void *opaque) >> +{ >> + >> + struct uio_info * dev_info = (struct uio_info *) opaque; >> + >> + /* we have to do this explicitly when using MSI-X */ >> + uio_event_notify(dev_info); >> + printk(KERN_INFO "MSI-X interrupt (%d)\n", irq); >> + return IRQ_HANDLED; >> + >> +} >> + >> +static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors) >> +{ >> + int i, err; >> + const char *name = "ivshmem"; >> + >> + printk(KERN_INFO "devname is %s\n", name); >> + ivs_info->nvectors = nvectors; >> + >> + >> + ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries, >> + GFP_KERNEL); >> + ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names, >> + GFP_KERNEL); >> + >> + for (i = 0; i < nvectors; ++i) >> + ivs_info->msix_entries[i].entry = i; >> + >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, >> + ivs_info->nvectors); >> + if (err > 0) { >> + ivs_info->nvectors = err; /* msi-x positive error code >> + returns the number available*/ >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, >> + ivs_info->nvectors); >> + if (err > 0) { >> + printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err); >> + return -ENOSPC; >> + } >> + } >> + >> + printk(KERN_INFO "err is %d\n", err); >> + if (err) return err; >> + >> + for (i = 0; i < ivs_info->nvectors; i++) { >> + >> + snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names, >> + "%s-config", name); >> + >> + ivs_info->msix_entries[i].entry = i; >> + err = request_irq(ivs_info->msix_entries[i].vector, >> + ivshmem_msix_handler, 0, >> + ivs_info->msix_names[i], ivs_info->uio); >> + >> + if (err) { >> + return -ENOSPC; >> + } >> + } >> + >> + return 0; >> +} >> + >> +static int __devinit ivshmem_pci_probe(struct pci_dev *dev, >> + const struct pci_device_id *id) >> +{ >> + struct uio_info *info; >> + struct ivshmem_info * ivshmem_info; >> + int nvectors = 4; >> + >> + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); >> + if (!info) >> + return -ENOMEM; >> + >> + ivshmem_info = kzalloc(sizeof(struct ivshmem_info), GFP_KERNEL); >> + if (!ivshmem_info) { >> + kfree(info); >> + return -ENOMEM; >> + } >> + >> + if (pci_enable_device(dev)) >> + goto out_free; >> + >> + if (pci_request_regions(dev, "ivshmem")) >> + goto out_disable; >> + >> + info->mem[0].addr = pci_resource_start(dev, 0); >> + if (!info->mem[0].addr) >> + goto out_release; >> + >> + info->mem[0].size = pci_resource_len(dev, 0); >> + info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); >> + if (!info->mem[0].internal_addr) { >> + printk(KERN_INFO "got a null"); >> + goto out_release; >> + } >> + >> + info->mem[0].memtype = UIO_MEM_PHYS; >> + >> + info->mem[1].addr = pci_resource_start(dev, 2); >> + if (!info->mem[1].addr) >> + goto out_unmap; >> + info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); >> + if (!info->mem[1].internal_addr) >> + goto out_unmap; >> + >> + info->mem[1].size = pci_resource_len(dev, 2); >> + info->mem[1].memtype = UIO_MEM_PHYS; >> + >> + ivshmem_info->uio = info; >> + ivshmem_info->dev = dev; >> + >> + if (request_msix_vectors(ivshmem_info, nvectors) != 0) { >> + printk(KERN_INFO "regular IRQs\n"); >> + info->irq = dev->irq; >> + info->irq_flags = IRQF_SHARED; >> + info->handler = ivshmem_handler; >> + writel(0xffffffff, info->mem[0].internal_addr + IntrMask); >> + } else { >> + printk(KERN_INFO "MSI-X enabled\n"); >> + info->irq = -1; >> + } >> + >> + info->name = "ivshmem"; >> + info->version = "0.0.1"; >> + >> + if (uio_register_device(&dev->dev, info)) >> + goto out_unmap2; >> + >> + pci_set_drvdata(dev, info); >> + >> + >> + return 0; >> +out_unmap2: >> + iounmap(info->mem[2].internal_addr); >> +out_unmap: >> + iounmap(info->mem[0].internal_addr); >> +out_release: >> + pci_release_regions(dev); >> +out_disable: >> + pci_disable_device(dev); >> +out_free: >> + kfree (info); >> + return -ENODEV; >> +} >> + >> +static void ivshmem_pci_remove(struct pci_dev *dev) >> +{ >> + struct uio_info *info = pci_get_drvdata(dev); >> + >> + uio_unregister_device(info); >> + pci_release_regions(dev); >> + pci_disable_device(dev); >> + pci_set_drvdata(dev, NULL); >> + iounmap(info->mem[0].internal_addr); >> + >> + kfree (info); >> +} >> + >> +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { >> + { >> + .vendor = 0x1af4, >> + .device = 0x1110, >> + .subvendor = PCI_ANY_ID, >> + .subdevice = PCI_ANY_ID, >> + }, >> + { 0, } >> +}; >> + >> +static struct pci_driver ivshmem_pci_driver = { >> + .name = "uio_ivshmem", >> + .id_table = ivshmem_pci_ids, >> + .probe = ivshmem_pci_probe, >> + .remove = ivshmem_pci_remove, >> +}; >> + >> +static int __init ivshmem_init_module(void) >> +{ >> + return pci_register_driver(&ivshmem_pci_driver); >> +} >> + >> +static void __exit ivshmem_exit_module(void) >> +{ >> + pci_unregister_driver(&ivshmem_pci_driver); >> +} >> + >> +module_init(ivshmem_init_module); >> +module_exit(ivshmem_exit_module); >> + >> +MODULE_DEVICE_TABLE(pci, ivshmem_pci_ids); >> +MODULE_LICENSE("GPL v2"); >> +MODULE_AUTHOR("Cam Macdonell"); >> -- >> 1.6.6.1 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > >
On 03/25/2010 11:18 AM, Cam Macdonell wrote: > On Thu, Mar 25, 2010 at 3:15 AM, Michael S. Tsirkin<mst@redhat.com> wrote: > >> On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: >> >>> This patch adds a driver for my shared memory PCI device using the uio_pci >>> interface. The driver has three memory regions. The first memory region is for >>> device registers for sending interrupts. The second BAR is for receiving MSI-X >>> interrupts and the third memory region maps the shared memory. The device only >>> exports the first and third memory regions to userspace. >>> >>> This driver supports MSI-X and regular pin interrupts. Currently, the number of >>> MSI vectors is set to 4 which could be increased, but the driver will work with >>> fewer vectors. If MSI is not available, then regular interrupts will be used. >>> >> Some high level questions, sorry if they have been raised in the past: >> - Can this device use virtio framework? >> This gives us some standards to work off, with feature negotiation, >> ability to detect config changes, support for non-PCI >> platforms, decent documentation that is easy to extend, >> legal id range to use. >> You would thus have your driver in uio/uio_virtio_shmem.c >> > There has been previous discussion of virtio, however while virtio is > good for exporting guest memory, it's not ideal for importing memory > into a guest. > virtio is a DMA-based API which means that it doesn't assume cache coherent shared memory. The PCI transport takes advantage of cache coherent shared memory but it's not strictly required. Memory sharing in virtio would be a layering violation because it forces cache coherent shared memory for all virtio transports. Regards, Anthony Liguori
On Thu, Mar 25, 2010 at 3:46 AM, Avi Kivity <avi@redhat.com> wrote: > On 03/25/2010 08:09 AM, Cam Macdonell wrote: >> >> This patch adds a driver for my shared memory PCI device using the uio_pci >> interface. The driver has three memory regions. The first memory region >> is for >> device registers for sending interrupts. The second BAR is for receiving >> MSI-X >> interrupts and the third memory region maps the shared memory. The device >> only >> exports the first and third memory regions to userspace. >> >> This driver supports MSI-X and regular pin interrupts. Currently, the >> number of >> MSI vectors is set to 4 which could be increased, but the driver will work >> with >> fewer vectors. If MSI is not available, then regular interrupts will be >> used. >> > > There is now a generic PCI 2.3 driver that can handle all PCI devices. It > doesn't support MSI, but if we add MSI support then it can be used without > the need for a specialized driver. Agreed, I'd be happy to use the generic driver if MSI is there. What would MSI support for UIO look like? An array of "struct uio_irq" for the different vectors? Cam > > -- > error compiling committee.c: too many arguments to function > >
On Thu, Mar 25, 2010 at 3:05 AM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: >> This patch adds a driver for my shared memory PCI device using the uio_pci >> interface. The driver has three memory regions. The first memory region is for >> device registers for sending interrupts. The second BAR is for receiving MSI-X >> interrupts and the third memory region maps the shared memory. The device only >> exports the first and third memory regions to userspace. >> >> This driver supports MSI-X and regular pin interrupts. Currently, the number of >> MSI vectors is set to 4 which could be increased, but the driver will work with >> fewer vectors. If MSI is not available, then regular interrupts will be used. >> --- >> drivers/uio/Kconfig | 8 ++ >> drivers/uio/Makefile | 1 + >> drivers/uio/uio_ivshmem.c | 235 +++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 244 insertions(+), 0 deletions(-) >> create mode 100644 drivers/uio/uio_ivshmem.c >> >> diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig >> index 1da73ec..b92cded 100644 >> --- a/drivers/uio/Kconfig >> +++ b/drivers/uio/Kconfig >> @@ -74,6 +74,14 @@ config UIO_SERCOS3 >> >> If you compile this as a module, it will be called uio_sercos3. >> >> +config UIO_IVSHMEM >> + tristate "KVM shared memory PCI driver" >> + default n >> + help >> + Userspace I/O interface for the KVM shared memory device. This >> + driver will make available two memory regions, the first is >> + registers and the second is a region for sharing between VMs. >> + >> config UIO_PCI_GENERIC >> tristate "Generic driver for PCI 2.3 and PCI Express cards" >> depends on PCI >> diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile >> index 18fd818..25c1ca5 100644 >> --- a/drivers/uio/Makefile >> +++ b/drivers/uio/Makefile >> @@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC) += uio_aec.o >> obj-$(CONFIG_UIO_SERCOS3) += uio_sercos3.o >> obj-$(CONFIG_UIO_PCI_GENERIC) += uio_pci_generic.o >> obj-$(CONFIG_UIO_NETX) += uio_netx.o >> +obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o >> diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c >> new file mode 100644 >> index 0000000..607435b >> --- /dev/null >> +++ b/drivers/uio/uio_ivshmem.c >> @@ -0,0 +1,235 @@ >> +/* >> + * UIO IVShmem Driver >> + * >> + * (C) 2009 Cam Macdonell >> + * based on Hilscher CIF card driver (C) 2007 Hans J. Koch <hjk@linutronix.de> >> + * >> + * Licensed under GPL version 2 only. >> + * >> + */ >> + >> +#include <linux/device.h> >> +#include <linux/module.h> >> +#include <linux/pci.h> >> +#include <linux/uio_driver.h> >> + >> +#include <asm/io.h> >> + >> +#define IntrStatus 0x04 >> +#define IntrMask 0x00 >> + >> +struct ivshmem_info { >> + struct uio_info *uio; >> + struct pci_dev *dev; >> + char (*msix_names)[256]; >> + struct msix_entry *msix_entries; >> + int nvectors; >> +}; >> + >> +static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info) >> +{ >> + >> + void __iomem *plx_intscr = dev_info->mem[0].internal_addr >> + + IntrStatus; >> + u32 val; >> + >> + val = readl(plx_intscr); >> + if (val == 0) >> + return IRQ_NONE; >> + >> + printk(KERN_INFO "Regular interrupt (val = %d)\n", val); >> + return IRQ_HANDLED; >> +} >> + >> +static irqreturn_t ivshmem_msix_handler(int irq, void *opaque) >> +{ >> + >> + struct uio_info * dev_info = (struct uio_info *) opaque; >> + >> + /* we have to do this explicitly when using MSI-X */ >> + uio_event_notify(dev_info); > > How does userspace know which vector was triggered? Right now, it doesn't. If a user had a particular need they would need to write their own uio driver. I guess this leads to a discussion of MSI support in UIO and how they would work with the userspace. > >> + printk(KERN_INFO "MSI-X interrupt (%d)\n", irq); >> + return IRQ_HANDLED; >> + > > extra empty line > >> +} >> + >> +static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors) >> +{ >> + int i, err; >> + const char *name = "ivshmem"; >> + >> + printk(KERN_INFO "devname is %s\n", name); > > These KERN_INFO messages need to be cleaned up, they would be > look confusing in the log. Agreed. I will clean most of these out. > >> + ivs_info->nvectors = nvectors; >> + >> + >> + ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries, >> + GFP_KERNEL); >> + ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names, >> + GFP_KERNEL); >> + > > need to handle errors > >> + for (i = 0; i < nvectors; ++i) >> + ivs_info->msix_entries[i].entry = i; >> + >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, >> + ivs_info->nvectors); >> + if (err > 0) { >> + ivs_info->nvectors = err; /* msi-x positive error code >> + returns the number available*/ >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, >> + ivs_info->nvectors); >> + if (err > 0) { >> + printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err); >> + return -ENOSPC; >> + } >> + } > > we can also get err < 0. > >> + >> + printk(KERN_INFO "err is %d\n", err); >> + if (err) return err; > > coding style rule violation > >> + >> + for (i = 0; i < ivs_info->nvectors; i++) { >> + >> + snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names, >> + "%s-config", name); >> + >> + ivs_info->msix_entries[i].entry = i; >> + err = request_irq(ivs_info->msix_entries[i].vector, >> + ivshmem_msix_handler, 0, >> + ivs_info->msix_names[i], ivs_info->uio); >> + >> + if (err) { >> + return -ENOSPC; > > coding style rule violation > no undo on error handling > why override error code with -ENOSPC? Ah, I think I've confused linux and qemu coding styles perhaps. I'll fix these and others above. > >> + } >> + } >> + >> + return 0; >> +} >> + >> +static int __devinit ivshmem_pci_probe(struct pci_dev *dev, >> + const struct pci_device_id *id) >> +{ >> + struct uio_info *info; >> + struct ivshmem_info * ivshmem_info; >> + int nvectors = 4; >> + >> + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); >> + if (!info) >> + return -ENOMEM; >> + >> + ivshmem_info = kzalloc(sizeof(struct ivshmem_info), GFP_KERNEL); >> + if (!ivshmem_info) { >> + kfree(info); >> + return -ENOMEM; >> + } >> + >> + if (pci_enable_device(dev)) >> + goto out_free; >> + >> + if (pci_request_regions(dev, "ivshmem")) >> + goto out_disable; >> + >> + info->mem[0].addr = pci_resource_start(dev, 0); >> + if (!info->mem[0].addr) >> + goto out_release; >> + >> + info->mem[0].size = pci_resource_len(dev, 0); >> + info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); >> + if (!info->mem[0].internal_addr) { >> + printk(KERN_INFO "got a null"); >> + goto out_release; >> + } >> + >> + info->mem[0].memtype = UIO_MEM_PHYS; >> + >> + info->mem[1].addr = pci_resource_start(dev, 2); >> + if (!info->mem[1].addr) >> + goto out_unmap; >> + info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); >> + if (!info->mem[1].internal_addr) >> + goto out_unmap; >> + >> + info->mem[1].size = pci_resource_len(dev, 2); >> + info->mem[1].memtype = UIO_MEM_PHYS; >> + >> + ivshmem_info->uio = info; >> + ivshmem_info->dev = dev; >> + >> + if (request_msix_vectors(ivshmem_info, nvectors) != 0) { >> + printk(KERN_INFO "regular IRQs\n"); >> + info->irq = dev->irq; >> + info->irq_flags = IRQF_SHARED; >> + info->handler = ivshmem_handler; >> + writel(0xffffffff, info->mem[0].internal_addr + IntrMask); >> + } else { >> + printk(KERN_INFO "MSI-X enabled\n"); >> + info->irq = -1; >> + } >> + >> + info->name = "ivshmem"; >> + info->version = "0.0.1"; >> + >> + if (uio_register_device(&dev->dev, info)) >> + goto out_unmap2; >> + >> + pci_set_drvdata(dev, info); >> + >> + >> + return 0; >> +out_unmap2: >> + iounmap(info->mem[2].internal_addr); >> +out_unmap: >> + iounmap(info->mem[0].internal_addr); >> +out_release: >> + pci_release_regions(dev); >> +out_disable: >> + pci_disable_device(dev); >> +out_free: >> + kfree (info); >> + return -ENODEV; >> +} >> + >> +static void ivshmem_pci_remove(struct pci_dev *dev) >> +{ >> + struct uio_info *info = pci_get_drvdata(dev); >> + >> + uio_unregister_device(info); >> + pci_release_regions(dev); >> + pci_disable_device(dev); >> + pci_set_drvdata(dev, NULL); >> + iounmap(info->mem[0].internal_addr); >> + >> + kfree (info); >> +} >> + >> +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { >> + { >> + .vendor = 0x1af4, >> + .device = 0x1110, > > vendor ids must be registered with PCI SIG. > this one does not seem to be registered. > >> + .subvendor = PCI_ANY_ID, >> + .subdevice = PCI_ANY_ID, >> + }, >> + { 0, } >> +}; >> + >> +static struct pci_driver ivshmem_pci_driver = { >> + .name = "uio_ivshmem", >> + .id_table = ivshmem_pci_ids, >> + .probe = ivshmem_pci_probe, >> + .remove = ivshmem_pci_remove, >> +}; >> + >> +static int __init ivshmem_init_module(void) >> +{ >> + return pci_register_driver(&ivshmem_pci_driver); >> +} >> + >> +static void __exit ivshmem_exit_module(void) >> +{ >> + pci_unregister_driver(&ivshmem_pci_driver); >> +} >> + >> +module_init(ivshmem_init_module); >> +module_exit(ivshmem_exit_module); >> + >> +MODULE_DEVICE_TABLE(pci, ivshmem_pci_ids); >> +MODULE_LICENSE("GPL v2"); >> +MODULE_AUTHOR("Cam Macdonell"); >> -- >> 1.6.6.1 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > >
On 03/25/2010 06:23 PM, Anthony Liguori wrote: >> There has been previous discussion of virtio, however while virtio is >> good for exporting guest memory, it's not ideal for importing memory >> into a guest. > > virtio is a DMA-based API which means that it doesn't assume cache > coherent shared memory. The PCI transport takes advantage of cache > coherent shared memory but it's not strictly required. Aren't we violating this by not using dma_alloc_coherent() for the queues?
On Thu, Mar 25, 2010 at 11:23:05AM -0500, Anthony Liguori wrote: > On 03/25/2010 11:18 AM, Cam Macdonell wrote: >> On Thu, Mar 25, 2010 at 3:15 AM, Michael S. Tsirkin<mst@redhat.com> wrote: >> >>> On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: >>> >>>> This patch adds a driver for my shared memory PCI device using the uio_pci >>>> interface. The driver has three memory regions. The first memory region is for >>>> device registers for sending interrupts. The second BAR is for receiving MSI-X >>>> interrupts and the third memory region maps the shared memory. The device only >>>> exports the first and third memory regions to userspace. >>>> >>>> This driver supports MSI-X and regular pin interrupts. Currently, the number of >>>> MSI vectors is set to 4 which could be increased, but the driver will work with >>>> fewer vectors. If MSI is not available, then regular interrupts will be used. >>>> >>> Some high level questions, sorry if they have been raised in the past: >>> - Can this device use virtio framework? >>> This gives us some standards to work off, with feature negotiation, >>> ability to detect config changes, support for non-PCI >>> platforms, decent documentation that is easy to extend, >>> legal id range to use. >>> You would thus have your driver in uio/uio_virtio_shmem.c >>> >> There has been previous discussion of virtio, however while virtio is >> good for exporting guest memory, it's not ideal for importing memory >> into a guest. >> > > virtio is a DMA-based API which means that it doesn't assume cache > coherent shared memory. The PCI transport takes advantage of cache > coherent shared memory but it's not strictly required. > > Memory sharing in virtio would be a layering violation because it forces > cache coherent shared memory for all virtio transports. > > Regards, > > Anthony Liguori I am not sure I understand the argument. We can still reuse feature negotiation, notifications etc from virtio. If some transports can not support cache coherent shared memory, the device won't work there. But it won't work there anyway, even without using virtio. We are not forcing all devices to assume cache coherency. In other words, let's not fall into midlayer mistake, let's treat virtio as a library.
On Thu, Mar 25, 2010 at 10:30:42AM -0600, Cam Macdonell wrote: > On Thu, Mar 25, 2010 at 3:05 AM, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: > >> This patch adds a driver for my shared memory PCI device using the uio_pci > >> interface. The driver has three memory regions. The first memory region is for > >> device registers for sending interrupts. The second BAR is for receiving MSI-X > >> interrupts and the third memory region maps the shared memory. The device only > >> exports the first and third memory regions to userspace. > >> > >> This driver supports MSI-X and regular pin interrupts. Currently, the number of > >> MSI vectors is set to 4 which could be increased, but the driver will work with > >> fewer vectors. If MSI is not available, then regular interrupts will be used. > >> --- > >> drivers/uio/Kconfig | 8 ++ > >> drivers/uio/Makefile | 1 + > >> drivers/uio/uio_ivshmem.c | 235 +++++++++++++++++++++++++++++++++++++++++++++ > >> 3 files changed, 244 insertions(+), 0 deletions(-) > >> create mode 100644 drivers/uio/uio_ivshmem.c > >> > >> diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig > >> index 1da73ec..b92cded 100644 > >> --- a/drivers/uio/Kconfig > >> +++ b/drivers/uio/Kconfig > >> @@ -74,6 +74,14 @@ config UIO_SERCOS3 > >> > >> If you compile this as a module, it will be called uio_sercos3. > >> > >> +config UIO_IVSHMEM > >> + tristate "KVM shared memory PCI driver" > >> + default n > >> + help > >> + Userspace I/O interface for the KVM shared memory device. This > >> + driver will make available two memory regions, the first is > >> + registers and the second is a region for sharing between VMs. > >> + > >> config UIO_PCI_GENERIC > >> tristate "Generic driver for PCI 2.3 and PCI Express cards" > >> depends on PCI > >> diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile > >> index 18fd818..25c1ca5 100644 > >> --- a/drivers/uio/Makefile > >> +++ b/drivers/uio/Makefile > >> @@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC) += uio_aec.o > >> obj-$(CONFIG_UIO_SERCOS3) += uio_sercos3.o > >> obj-$(CONFIG_UIO_PCI_GENERIC) += uio_pci_generic.o > >> obj-$(CONFIG_UIO_NETX) += uio_netx.o > >> +obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o > >> diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c > >> new file mode 100644 > >> index 0000000..607435b > >> --- /dev/null > >> +++ b/drivers/uio/uio_ivshmem.c > >> @@ -0,0 +1,235 @@ > >> +/* > >> + * UIO IVShmem Driver > >> + * > >> + * (C) 2009 Cam Macdonell > >> + * based on Hilscher CIF card driver (C) 2007 Hans J. Koch <hjk@linutronix.de> > >> + * > >> + * Licensed under GPL version 2 only. > >> + * > >> + */ > >> + > >> +#include <linux/device.h> > >> +#include <linux/module.h> > >> +#include <linux/pci.h> > >> +#include <linux/uio_driver.h> > >> + > >> +#include <asm/io.h> > >> + > >> +#define IntrStatus 0x04 > >> +#define IntrMask 0x00 > >> + > >> +struct ivshmem_info { > >> + struct uio_info *uio; > >> + struct pci_dev *dev; > >> + char (*msix_names)[256]; > >> + struct msix_entry *msix_entries; > >> + int nvectors; > >> +}; > >> + > >> +static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info) > >> +{ > >> + > >> + void __iomem *plx_intscr = dev_info->mem[0].internal_addr > >> + + IntrStatus; > >> + u32 val; > >> + > >> + val = readl(plx_intscr); > >> + if (val == 0) > >> + return IRQ_NONE; > >> + > >> + printk(KERN_INFO "Regular interrupt (val = %d)\n", val); > >> + return IRQ_HANDLED; > >> +} > >> + > >> +static irqreturn_t ivshmem_msix_handler(int irq, void *opaque) > >> +{ > >> + > >> + struct uio_info * dev_info = (struct uio_info *) opaque; > >> + > >> + /* we have to do this explicitly when using MSI-X */ > >> + uio_event_notify(dev_info); > > > > How does userspace know which vector was triggered? > > Right now, it doesn't. If a user had a particular need they would > need to write their own uio driver. I guess this leads to a > discussion of MSI support in UIO and how they would work with the > userspace. So why request more than one vector then? > > > >> + printk(KERN_INFO "MSI-X interrupt (%d)\n", irq); > >> + return IRQ_HANDLED; > >> + > > > > extra empty line > > > >> +} > >> + > >> +static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors) > >> +{ > >> + int i, err; > >> + const char *name = "ivshmem"; > >> + > >> + printk(KERN_INFO "devname is %s\n", name); > > > > These KERN_INFO messages need to be cleaned up, they would be > > look confusing in the log. > > Agreed. I will clean most of these out. > > > > >> + ivs_info->nvectors = nvectors; > >> + > >> + > >> + ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries, > >> + GFP_KERNEL); > >> + ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names, > >> + GFP_KERNEL); > >> + > > > > need to handle errors > > > >> + for (i = 0; i < nvectors; ++i) > >> + ivs_info->msix_entries[i].entry = i; > >> + > >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, > >> + ivs_info->nvectors); > >> + if (err > 0) { > >> + ivs_info->nvectors = err; /* msi-x positive error code > >> + returns the number available*/ > >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, > >> + ivs_info->nvectors); > >> + if (err > 0) { > >> + printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err); > >> + return -ENOSPC; > >> + } > >> + } > > > > we can also get err < 0. > > > >> + > >> + printk(KERN_INFO "err is %d\n", err); > >> + if (err) return err; > > > > coding style rule violation > > > >> + > >> + for (i = 0; i < ivs_info->nvectors; i++) { > >> + > >> + snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names, > >> + "%s-config", name); > >> + > >> + ivs_info->msix_entries[i].entry = i; > >> + err = request_irq(ivs_info->msix_entries[i].vector, > >> + ivshmem_msix_handler, 0, > >> + ivs_info->msix_names[i], ivs_info->uio); > >> + > >> + if (err) { > >> + return -ENOSPC; > > > > coding style rule violation > > no undo on error handling > > why override error code with -ENOSPC? > > Ah, I think I've confused linux and qemu coding styles perhaps. I'll > fix these and others above. > > > > >> + } > >> + } > >> + > >> + return 0; > >> +} > >> + > >> +static int __devinit ivshmem_pci_probe(struct pci_dev *dev, > >> + const struct pci_device_id *id) > >> +{ > >> + struct uio_info *info; > >> + struct ivshmem_info * ivshmem_info; > >> + int nvectors = 4; > >> + > >> + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); > >> + if (!info) > >> + return -ENOMEM; > >> + > >> + ivshmem_info = kzalloc(sizeof(struct ivshmem_info), GFP_KERNEL); > >> + if (!ivshmem_info) { > >> + kfree(info); > >> + return -ENOMEM; > >> + } > >> + > >> + if (pci_enable_device(dev)) > >> + goto out_free; > >> + > >> + if (pci_request_regions(dev, "ivshmem")) > >> + goto out_disable; > >> + > >> + info->mem[0].addr = pci_resource_start(dev, 0); > >> + if (!info->mem[0].addr) > >> + goto out_release; > >> + > >> + info->mem[0].size = pci_resource_len(dev, 0); > >> + info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); > >> + if (!info->mem[0].internal_addr) { > >> + printk(KERN_INFO "got a null"); > >> + goto out_release; > >> + } > >> + > >> + info->mem[0].memtype = UIO_MEM_PHYS; > >> + > >> + info->mem[1].addr = pci_resource_start(dev, 2); > >> + if (!info->mem[1].addr) > >> + goto out_unmap; > >> + info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); > >> + if (!info->mem[1].internal_addr) > >> + goto out_unmap; > >> + > >> + info->mem[1].size = pci_resource_len(dev, 2); > >> + info->mem[1].memtype = UIO_MEM_PHYS; > >> + > >> + ivshmem_info->uio = info; > >> + ivshmem_info->dev = dev; > >> + > >> + if (request_msix_vectors(ivshmem_info, nvectors) != 0) { > >> + printk(KERN_INFO "regular IRQs\n"); > >> + info->irq = dev->irq; > >> + info->irq_flags = IRQF_SHARED; > >> + info->handler = ivshmem_handler; > >> + writel(0xffffffff, info->mem[0].internal_addr + IntrMask); > >> + } else { > >> + printk(KERN_INFO "MSI-X enabled\n"); > >> + info->irq = -1; > >> + } > >> + > >> + info->name = "ivshmem"; > >> + info->version = "0.0.1"; > >> + > >> + if (uio_register_device(&dev->dev, info)) > >> + goto out_unmap2; > >> + > >> + pci_set_drvdata(dev, info); > >> + > >> + > >> + return 0; > >> +out_unmap2: > >> + iounmap(info->mem[2].internal_addr); > >> +out_unmap: > >> + iounmap(info->mem[0].internal_addr); > >> +out_release: > >> + pci_release_regions(dev); > >> +out_disable: > >> + pci_disable_device(dev); > >> +out_free: > >> + kfree (info); > >> + return -ENODEV; > >> +} > >> + > >> +static void ivshmem_pci_remove(struct pci_dev *dev) > >> +{ > >> + struct uio_info *info = pci_get_drvdata(dev); > >> + > >> + uio_unregister_device(info); > >> + pci_release_regions(dev); > >> + pci_disable_device(dev); > >> + pci_set_drvdata(dev, NULL); > >> + iounmap(info->mem[0].internal_addr); > >> + > >> + kfree (info); > >> +} > >> + > >> +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { > >> + { > >> + .vendor = 0x1af4, > >> + .device = 0x1110, > > > > vendor ids must be registered with PCI SIG. > > this one does not seem to be registered. > > > >> + .subvendor = PCI_ANY_ID, > >> + .subdevice = PCI_ANY_ID, > >> + }, > >> + { 0, } > >> +}; > >> + > >> +static struct pci_driver ivshmem_pci_driver = { > >> + .name = "uio_ivshmem", > >> + .id_table = ivshmem_pci_ids, > >> + .probe = ivshmem_pci_probe, > >> + .remove = ivshmem_pci_remove, > >> +}; > >> + > >> +static int __init ivshmem_init_module(void) > >> +{ > >> + return pci_register_driver(&ivshmem_pci_driver); > >> +} > >> + > >> +static void __exit ivshmem_exit_module(void) > >> +{ > >> + pci_unregister_driver(&ivshmem_pci_driver); > >> +} > >> + > >> +module_init(ivshmem_init_module); > >> +module_exit(ivshmem_exit_module); > >> + > >> +MODULE_DEVICE_TABLE(pci, ivshmem_pci_ids); > >> +MODULE_LICENSE("GPL v2"); > >> +MODULE_AUTHOR("Cam Macdonell"); > >> -- > >> 1.6.6.1 > >> > >> -- > >> To unsubscribe from this list: send the line "unsubscribe kvm" in > >> the body of a message to majordomo@vger.kernel.org > >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > > >
On 03/25/2010 06:24 PM, Cam Macdonell wrote: > >> There is now a generic PCI 2.3 driver that can handle all PCI devices. It >> doesn't support MSI, but if we add MSI support then it can be used without >> the need for a specialized driver. >> > Agreed, I'd be happy to use the generic driver if MSI is there. What > would MSI support for UIO look like? An array of "struct uio_irq" for > the different vectors? > I'm not familiar with the uio internals, but for the interface, an ioctl() on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, but instead of mapping a doorbell to an eventfd, it maps a real MSI to an eventfd. That would be very useful for device assignment: we can pick up a uio device, map its vectors, and give them to a guest.
On 03/25/2010 11:05 AM, Michael S. Tsirkin wrote: > >> +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { >> + { >> + .vendor = 0x1af4, >> + .device = 0x1110, >> > vendor ids must be registered with PCI SIG. > this one does not seem to be registered. > That's the Qumranet vendor ID.
On Thu, Mar 25, 2010 at 06:36:02PM +0200, Avi Kivity wrote: > On 03/25/2010 11:05 AM, Michael S. Tsirkin wrote: >> >>> +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { >>> + { >>> + .vendor = 0x1af4, >>> + .device = 0x1110, >>> >> vendor ids must be registered with PCI SIG. >> this one does not seem to be registered. >> > > That's the Qumranet vendor ID. Isn't virtio_pci matching that id already? > -- > error compiling committee.c: too many arguments to function
On Thu, Mar 25, 2010 at 06:32:15PM +0200, Avi Kivity wrote: > On 03/25/2010 06:23 PM, Anthony Liguori wrote: >>> There has been previous discussion of virtio, however while virtio is >>> good for exporting guest memory, it's not ideal for importing memory >>> into a guest. >> >> virtio is a DMA-based API which means that it doesn't assume cache >> coherent shared memory. The PCI transport takes advantage of cache >> coherent shared memory but it's not strictly required. > > Aren't we violating this by not using dma_alloc_coherent() for the queues? I don't see what changing this would buys us though, unless a non-cache coherent architecture implements kvm. TCG runs everything on a single processor so there are no cache coherency issues. > -- > error compiling committee.c: too many arguments to function
On Thu, Mar 25, 2010 at 10:24:20AM -0600, Cam Macdonell wrote: > On Thu, Mar 25, 2010 at 3:46 AM, Avi Kivity <avi@redhat.com> wrote: > > On 03/25/2010 08:09 AM, Cam Macdonell wrote: > >> > >> This patch adds a driver for my shared memory PCI device using the uio_pci > >> interface. The driver has three memory regions. The first memory region > >> is for > >> device registers for sending interrupts. The second BAR is for receiving > >> MSI-X > >> interrupts and the third memory region maps the shared memory. The device > >> only > >> exports the first and third memory regions to userspace. > >> > >> This driver supports MSI-X and regular pin interrupts. Currently, the > >> number of > >> MSI vectors is set to 4 which could be increased, but the driver will work > >> with > >> fewer vectors. If MSI is not available, then regular interrupts will be > >> used. > >> > > > > There is now a generic PCI 2.3 driver that can handle all PCI devices. It > > doesn't support MSI, but if we add MSI support then it can be used without > > the need for a specialized driver. > > Agreed, I'd be happy to use the generic driver if MSI is there. What > would MSI support for UIO look like? An array of "struct uio_irq" for > the different vectors? > > Cam My idea was to supply a way to bind eventfd to a vector. > > > > -- > > error compiling committee.c: too many arguments to function > > > >
On 03/25/2010 06:40 PM, Michael S. Tsirkin wrote: > On Thu, Mar 25, 2010 at 06:32:15PM +0200, Avi Kivity wrote: > >> On 03/25/2010 06:23 PM, Anthony Liguori wrote: >> >>>> There has been previous discussion of virtio, however while virtio is >>>> good for exporting guest memory, it's not ideal for importing memory >>>> into a guest. >>>> >>> virtio is a DMA-based API which means that it doesn't assume cache >>> coherent shared memory. The PCI transport takes advantage of cache >>> coherent shared memory but it's not strictly required. >>> >> Aren't we violating this by not using dma_alloc_coherent() for the queues? >> > I don't see what changing this would buys us though, unless > a non-cache coherent architecture implements kvm. > We're preventing people from implementing virtio devices in hardware, not that anyone's doing that.
On 03/25/2010 06:37 PM, Michael S. Tsirkin wrote: > On Thu, Mar 25, 2010 at 06:36:02PM +0200, Avi Kivity wrote: > >> On 03/25/2010 11:05 AM, Michael S. Tsirkin wrote: >> >>> >>>> +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { >>>> + { >>>> + .vendor = 0x1af4, >>>> + .device = 0x1110, >>>> >>>> >>> vendor ids must be registered with PCI SIG. >>> this one does not seem to be registered. >>> >>> >> That's the Qumranet vendor ID. >> > Isn't virtio_pci matching that id already? > > virtio matches devices 1000-1100 or something.
On Thu, Mar 25, 2010 at 10:34 AM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Thu, Mar 25, 2010 at 10:30:42AM -0600, Cam Macdonell wrote: >> On Thu, Mar 25, 2010 at 3:05 AM, Michael S. Tsirkin <mst@redhat.com> wrote: >> > On Thu, Mar 25, 2010 at 12:09:36AM -0600, Cam Macdonell wrote: >> >> This patch adds a driver for my shared memory PCI device using the uio_pci >> >> interface. The driver has three memory regions. The first memory region is for >> >> device registers for sending interrupts. The second BAR is for receiving MSI-X >> >> interrupts and the third memory region maps the shared memory. The device only >> >> exports the first and third memory regions to userspace. >> >> >> >> This driver supports MSI-X and regular pin interrupts. Currently, the number of >> >> MSI vectors is set to 4 which could be increased, but the driver will work with >> >> fewer vectors. If MSI is not available, then regular interrupts will be used. >> >> --- >> >> drivers/uio/Kconfig | 8 ++ >> >> drivers/uio/Makefile | 1 + >> >> drivers/uio/uio_ivshmem.c | 235 +++++++++++++++++++++++++++++++++++++++++++++ >> >> 3 files changed, 244 insertions(+), 0 deletions(-) >> >> create mode 100644 drivers/uio/uio_ivshmem.c >> >> >> >> diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig >> >> index 1da73ec..b92cded 100644 >> >> --- a/drivers/uio/Kconfig >> >> +++ b/drivers/uio/Kconfig >> >> @@ -74,6 +74,14 @@ config UIO_SERCOS3 >> >> >> >> If you compile this as a module, it will be called uio_sercos3. >> >> >> >> +config UIO_IVSHMEM >> >> + tristate "KVM shared memory PCI driver" >> >> + default n >> >> + help >> >> + Userspace I/O interface for the KVM shared memory device. This >> >> + driver will make available two memory regions, the first is >> >> + registers and the second is a region for sharing between VMs. >> >> + >> >> config UIO_PCI_GENERIC >> >> tristate "Generic driver for PCI 2.3 and PCI Express cards" >> >> depends on PCI >> >> diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile >> >> index 18fd818..25c1ca5 100644 >> >> --- a/drivers/uio/Makefile >> >> +++ b/drivers/uio/Makefile >> >> @@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC) += uio_aec.o >> >> obj-$(CONFIG_UIO_SERCOS3) += uio_sercos3.o >> >> obj-$(CONFIG_UIO_PCI_GENERIC) += uio_pci_generic.o >> >> obj-$(CONFIG_UIO_NETX) += uio_netx.o >> >> +obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o >> >> diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c >> >> new file mode 100644 >> >> index 0000000..607435b >> >> --- /dev/null >> >> +++ b/drivers/uio/uio_ivshmem.c >> >> @@ -0,0 +1,235 @@ >> >> +/* >> >> + * UIO IVShmem Driver >> >> + * >> >> + * (C) 2009 Cam Macdonell >> >> + * based on Hilscher CIF card driver (C) 2007 Hans J. Koch <hjk@linutronix.de> >> >> + * >> >> + * Licensed under GPL version 2 only. >> >> + * >> >> + */ >> >> + >> >> +#include <linux/device.h> >> >> +#include <linux/module.h> >> >> +#include <linux/pci.h> >> >> +#include <linux/uio_driver.h> >> >> + >> >> +#include <asm/io.h> >> >> + >> >> +#define IntrStatus 0x04 >> >> +#define IntrMask 0x00 >> >> + >> >> +struct ivshmem_info { >> >> + struct uio_info *uio; >> >> + struct pci_dev *dev; >> >> + char (*msix_names)[256]; >> >> + struct msix_entry *msix_entries; >> >> + int nvectors; >> >> +}; >> >> + >> >> +static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info) >> >> +{ >> >> + >> >> + void __iomem *plx_intscr = dev_info->mem[0].internal_addr >> >> + + IntrStatus; >> >> + u32 val; >> >> + >> >> + val = readl(plx_intscr); >> >> + if (val == 0) >> >> + return IRQ_NONE; >> >> + >> >> + printk(KERN_INFO "Regular interrupt (val = %d)\n", val); >> >> + return IRQ_HANDLED; >> >> +} >> >> + >> >> +static irqreturn_t ivshmem_msix_handler(int irq, void *opaque) >> >> +{ >> >> + >> >> + struct uio_info * dev_info = (struct uio_info *) opaque; >> >> + >> >> + /* we have to do this explicitly when using MSI-X */ >> >> + uio_event_notify(dev_info); >> > >> > How does userspace know which vector was triggered? >> >> Right now, it doesn't. If a user had a particular need they would >> need to write their own uio driver. I guess this leads to a >> discussion of MSI support in UIO and how they would work with the >> userspace. > > So why request more than one vector then? No strong reason, to some extent an artifact of my playing with uio and MSI together. But, since interrupts on vector 0 are raised on guest joins/exits, a user could modify the driver to ignore those if they wanted. Also, this way the device doesn't need to know if the guest is using 1 or 2 vectors. > >> > >> >> + printk(KERN_INFO "MSI-X interrupt (%d)\n", irq); >> >> + return IRQ_HANDLED; >> >> + >> > >> > extra empty line >> > >> >> +} >> >> + >> >> +static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors) >> >> +{ >> >> + int i, err; >> >> + const char *name = "ivshmem"; >> >> + >> >> + printk(KERN_INFO "devname is %s\n", name); >> > >> > These KERN_INFO messages need to be cleaned up, they would be >> > look confusing in the log. >> >> Agreed. I will clean most of these out. >> >> > >> >> + ivs_info->nvectors = nvectors; >> >> + >> >> + >> >> + ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries, >> >> + GFP_KERNEL); >> >> + ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names, >> >> + GFP_KERNEL); >> >> + >> > >> > need to handle errors >> > >> >> + for (i = 0; i < nvectors; ++i) >> >> + ivs_info->msix_entries[i].entry = i; >> >> + >> >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, >> >> + ivs_info->nvectors); >> >> + if (err > 0) { >> >> + ivs_info->nvectors = err; /* msi-x positive error code >> >> + returns the number available*/ >> >> + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, >> >> + ivs_info->nvectors); >> >> + if (err > 0) { >> >> + printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err); >> >> + return -ENOSPC; >> >> + } >> >> + } >> > >> > we can also get err < 0. >> > >> >> + >> >> + printk(KERN_INFO "err is %d\n", err); >> >> + if (err) return err; >> > >> > coding style rule violation >> > >> >> + >> >> + for (i = 0; i < ivs_info->nvectors; i++) { >> >> + >> >> + snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names, >> >> + "%s-config", name); >> >> + >> >> + ivs_info->msix_entries[i].entry = i; >> >> + err = request_irq(ivs_info->msix_entries[i].vector, >> >> + ivshmem_msix_handler, 0, >> >> + ivs_info->msix_names[i], ivs_info->uio); >> >> + >> >> + if (err) { >> >> + return -ENOSPC; >> > >> > coding style rule violation >> > no undo on error handling >> > why override error code with -ENOSPC? >> >> Ah, I think I've confused linux and qemu coding styles perhaps. I'll >> fix these and others above. >> >> > >> >> + } >> >> + } >> >> + >> >> + return 0; >> >> +} >> >> + >> >> +static int __devinit ivshmem_pci_probe(struct pci_dev *dev, >> >> + const struct pci_device_id *id) >> >> +{ >> >> + struct uio_info *info; >> >> + struct ivshmem_info * ivshmem_info; >> >> + int nvectors = 4; >> >> + >> >> + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); >> >> + if (!info) >> >> + return -ENOMEM; >> >> + >> >> + ivshmem_info = kzalloc(sizeof(struct ivshmem_info), GFP_KERNEL); >> >> + if (!ivshmem_info) { >> >> + kfree(info); >> >> + return -ENOMEM; >> >> + } >> >> + >> >> + if (pci_enable_device(dev)) >> >> + goto out_free; >> >> + >> >> + if (pci_request_regions(dev, "ivshmem")) >> >> + goto out_disable; >> >> + >> >> + info->mem[0].addr = pci_resource_start(dev, 0); >> >> + if (!info->mem[0].addr) >> >> + goto out_release; >> >> + >> >> + info->mem[0].size = pci_resource_len(dev, 0); >> >> + info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); >> >> + if (!info->mem[0].internal_addr) { >> >> + printk(KERN_INFO "got a null"); >> >> + goto out_release; >> >> + } >> >> + >> >> + info->mem[0].memtype = UIO_MEM_PHYS; >> >> + >> >> + info->mem[1].addr = pci_resource_start(dev, 2); >> >> + if (!info->mem[1].addr) >> >> + goto out_unmap; >> >> + info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); >> >> + if (!info->mem[1].internal_addr) >> >> + goto out_unmap; >> >> + >> >> + info->mem[1].size = pci_resource_len(dev, 2); >> >> + info->mem[1].memtype = UIO_MEM_PHYS; >> >> + >> >> + ivshmem_info->uio = info; >> >> + ivshmem_info->dev = dev; >> >> + >> >> + if (request_msix_vectors(ivshmem_info, nvectors) != 0) { >> >> + printk(KERN_INFO "regular IRQs\n"); >> >> + info->irq = dev->irq; >> >> + info->irq_flags = IRQF_SHARED; >> >> + info->handler = ivshmem_handler; >> >> + writel(0xffffffff, info->mem[0].internal_addr + IntrMask); >> >> + } else { >> >> + printk(KERN_INFO "MSI-X enabled\n"); >> >> + info->irq = -1; >> >> + } >> >> + >> >> + info->name = "ivshmem"; >> >> + info->version = "0.0.1"; >> >> + >> >> + if (uio_register_device(&dev->dev, info)) >> >> + goto out_unmap2; >> >> + >> >> + pci_set_drvdata(dev, info); >> >> + >> >> + >> >> + return 0; >> >> +out_unmap2: >> >> + iounmap(info->mem[2].internal_addr); >> >> +out_unmap: >> >> + iounmap(info->mem[0].internal_addr); >> >> +out_release: >> >> + pci_release_regions(dev); >> >> +out_disable: >> >> + pci_disable_device(dev); >> >> +out_free: >> >> + kfree (info); >> >> + return -ENODEV; >> >> +} >> >> + >> >> +static void ivshmem_pci_remove(struct pci_dev *dev) >> >> +{ >> >> + struct uio_info *info = pci_get_drvdata(dev); >> >> + >> >> + uio_unregister_device(info); >> >> + pci_release_regions(dev); >> >> + pci_disable_device(dev); >> >> + pci_set_drvdata(dev, NULL); >> >> + iounmap(info->mem[0].internal_addr); >> >> + >> >> + kfree (info); >> >> +} >> >> + >> >> +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { >> >> + { >> >> + .vendor = 0x1af4, >> >> + .device = 0x1110, >> > >> > vendor ids must be registered with PCI SIG. >> > this one does not seem to be registered. >> > >> >> + .subvendor = PCI_ANY_ID, >> >> + .subdevice = PCI_ANY_ID, >> >> + }, >> >> + { 0, } >> >> +}; >> >> + >> >> +static struct pci_driver ivshmem_pci_driver = { >> >> + .name = "uio_ivshmem", >> >> + .id_table = ivshmem_pci_ids, >> >> + .probe = ivshmem_pci_probe, >> >> + .remove = ivshmem_pci_remove, >> >> +}; >> >> + >> >> +static int __init ivshmem_init_module(void) >> >> +{ >> >> + return pci_register_driver(&ivshmem_pci_driver); >> >> +} >> >> + >> >> +static void __exit ivshmem_exit_module(void) >> >> +{ >> >> + pci_unregister_driver(&ivshmem_pci_driver); >> >> +} >> >> + >> >> +module_init(ivshmem_init_module); >> >> +module_exit(ivshmem_exit_module); >> >> + >> >> +MODULE_DEVICE_TABLE(pci, ivshmem_pci_ids); >> >> +MODULE_LICENSE("GPL v2"); >> >> +MODULE_AUTHOR("Cam Macdonell"); >> >> -- >> >> 1.6.6.1 >> >> >> >> -- >> >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> >> the body of a message to majordomo@vger.kernel.org >> >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > >> > > >
On Thu, Mar 25, 2010 at 10:35 AM, Avi Kivity <avi@redhat.com> wrote: > On 03/25/2010 06:24 PM, Cam Macdonell wrote: >> >>> There is now a generic PCI 2.3 driver that can handle all PCI devices. >>> It >>> doesn't support MSI, but if we add MSI support then it can be used >>> without >>> the need for a specialized driver. >>> >> >> Agreed, I'd be happy to use the generic driver if MSI is there. What >> would MSI support for UIO look like? An array of "struct uio_irq" for >> the different vectors? >> > > I'm not familiar with the uio internals, but for the interface, an ioctl() > on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, but > instead of mapping a doorbell to an eventfd, it maps a real MSI to an > eventfd. uio will never support ioctls. Maybe irqcontrol could be extended? > > That would be very useful for device assignment: we can pick up a uio > device, map its vectors, and give them to a guest. > > > -- > error compiling committee.c: too many arguments to function > >
On 03/26/2010 07:14 PM, Cam Macdonell wrote: > >> I'm not familiar with the uio internals, but for the interface, an ioctl() >> on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, but >> instead of mapping a doorbell to an eventfd, it maps a real MSI to an >> eventfd. >> > uio will never support ioctls. Why not? > Maybe irqcontrol could be extended? > What's irqcontrol?
On Sat, Mar 27, 2010 at 08:48:34PM +0300, Avi Kivity wrote: > On 03/26/2010 07:14 PM, Cam Macdonell wrote: >> >>> I'm not familiar with the uio internals, but for the interface, an ioctl() >>> on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, but >>> instead of mapping a doorbell to an eventfd, it maps a real MSI to an >>> eventfd. >>> >> uio will never support ioctls. > > Why not? > >> Maybe irqcontrol could be extended? >> > > What's irqcontrol? uio accepts 32 bit writes to the char device file. We can encode the fd number there, and use the high bit to signal assign/deassign. > -- > Do not meddle in the internals of kernels, for they are subtle and quick to panic.
On 03/28/2010 10:47 AM, Michael S. Tsirkin wrote: >> >>> Maybe irqcontrol could be extended? >>> >>> >> What's irqcontrol? >> > uio accepts 32 bit writes to the char device file. We can encode > the fd number there, and use the high bit to signal assign/deassign. > Ugh. Very unexpandable.
On Sun, Mar 28, 2010 at 11:02:11AM +0300, Avi Kivity wrote: > On 03/28/2010 10:47 AM, Michael S. Tsirkin wrote: >>> >>>> Maybe irqcontrol could be extended? >>>> >>>> >>> What's irqcontrol? >>> >> uio accepts 32 bit writes to the char device file. We can encode >> the fd number there, and use the high bit to signal assign/deassign. >> > > Ugh. Very unexpandable. It currently fails on any non-4 byte write. So if we need more bits in the future we can always teach it about e.g. 8 byte writes. Do you think it's worth it doing it now already, and using 8 byte writes for msi mapping? > -- > error compiling committee.c: too many arguments to function
On 03/28/2010 12:40 PM, Michael S. Tsirkin wrote: >>> uio accepts 32 bit writes to the char device file. We can encode >>> the fd number there, and use the high bit to signal assign/deassign. >>> >>> >> Ugh. Very unexpandable. >> > It currently fails on any non-4 byte write. > So if we need more bits in the future we can always teach it > about e.g. 8 byte writes. > > Do you think it's worth it doing it now already, and using > 8 byte writes for msi mapping? > Aren't ioctls a lot simpler? Multiplexing multiple functions on write()s is just ioctls done uglier.
On Sun, Mar 28, 2010 at 12:45:02PM +0300, Avi Kivity wrote: > On 03/28/2010 12:40 PM, Michael S. Tsirkin wrote: >>>> uio accepts 32 bit writes to the char device file. We can encode >>>> the fd number there, and use the high bit to signal assign/deassign. >>>> >>>> >>> Ugh. Very unexpandable. >>> >> It currently fails on any non-4 byte write. >> So if we need more bits in the future we can always teach it >> about e.g. 8 byte writes. >> >> Do you think it's worth it doing it now already, and using >> 8 byte writes for msi mapping? >> > > Aren't ioctls a lot simpler? > > Multiplexing multiple functions on write()s is just ioctls done uglier. I don't have an opinion here. Writes do have an advantage that strace can show the buffer content without being patched. Further, something along the lines proposed above means that we do not need to depend in a system header to get the functionality. > -- > error compiling committee.c: too many arguments to function
On 03/28/2010 01:31 PM, Michael S. Tsirkin wrote: > >> Aren't ioctls a lot simpler? >> >> Multiplexing multiple functions on write()s is just ioctls done uglier. >> > I don't have an opinion here. > > Writes do have an advantage that strace can show the buffer > content without being patched. > ioctls encode the buffer size into the ioctl number, so in theory strace doesn't need to be taught about an ioctl to show its buffer. > Further, something along the lines proposed above means that > we do not need to depend in a system header to get > the functionality. > Yes, point users at the code and let them figure out how to do stuff.
Avi Kivity wrote: > ioctls encode the buffer size into the ioctl number, so in theory strace > doesn't need to be taught about an ioctl to show its buffer. Unfortunately ioctl numbers don't always follow that rule :-( But maybe that's just awful proprietary drivers that I've seen. Anyway, strace should be taught how to read kernel headers to get ioctl types ;-) -- Jamie
On Sun, 28 Mar 2010, Jamie Lokier wrote: > Avi Kivity wrote: > > ioctls encode the buffer size into the ioctl number, so in theory strace > > doesn't need to be taught about an ioctl to show its buffer. > > Unfortunately ioctl numbers don't always follow that rule :-( It's not a rule to begin with, since there's no way to enforce it. > But maybe that's just awful proprietary drivers that I've seen. > > Anyway, strace should be taught how to read kernel headers to get > ioctl types ;-) > > -- Jamie > >
On Sat, Mar 27, 2010 at 11:48 AM, Avi Kivity <avi@redhat.com> wrote: > On 03/26/2010 07:14 PM, Cam Macdonell wrote: >> >>> I'm not familiar with the uio internals, but for the interface, an >>> ioctl() >>> on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, >>> but >>> instead of mapping a doorbell to an eventfd, it maps a real MSI to an >>> eventfd. >>> >> >> uio will never support ioctls. > > Why not? Perhaps I spoke too strongly, but it was rejected before http://thread.gmane.org/gmane.linux.kernel/756481 With a compelling case perhaps it could be added.
On 03/28/2010 10:48 PM, Cam Macdonell wrote: > On Sat, Mar 27, 2010 at 11:48 AM, Avi Kivity<avi@redhat.com> wrote: > >> On 03/26/2010 07:14 PM, Cam Macdonell wrote: >> >>> >>>> I'm not familiar with the uio internals, but for the interface, an >>>> ioctl() >>>> on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, >>>> but >>>> instead of mapping a doorbell to an eventfd, it maps a real MSI to an >>>> eventfd. >>>> >>>> >>> uio will never support ioctls. >>> >> Why not? >> > Perhaps I spoke too strongly, but it was rejected before > > http://thread.gmane.org/gmane.linux.kernel/756481 > > With a compelling case perhaps it could be added. > Ah, the usual "ioctls are ugly, go away". It could be done via sysfs: $ cat /sys/.../msix/max-interrupts 256 $ echo 4 > /sys/.../msix/allocate $ # subdirectories 0 1 2 3 magically appear $ # bind fd 13 to msix $ echo 13 > /sys/.../msix/2/bind-fd $ # from now on, msix interrupt 2 will call eventfd_signal() on fd 13 Call me old fashioned, but I prefer ioctls.
On Mon, Mar 29, 2010 at 2:59 PM, Avi Kivity <avi@redhat.com> wrote: > On 03/28/2010 10:48 PM, Cam Macdonell wrote: >> >> On Sat, Mar 27, 2010 at 11:48 AM, Avi Kivity<avi@redhat.com> wrote: >> >>> >>> On 03/26/2010 07:14 PM, Cam Macdonell wrote: >>> >>>> >>>> >>>>> >>>>> I'm not familiar with the uio internals, but for the interface, an >>>>> ioctl() >>>>> on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, >>>>> but >>>>> instead of mapping a doorbell to an eventfd, it maps a real MSI to an >>>>> eventfd. >>>>> >>>>> >>>> >>>> uio will never support ioctls. >>>> >>> >>> Why not? >>> >> >> Perhaps I spoke too strongly, but it was rejected before >> >> http://thread.gmane.org/gmane.linux.kernel/756481 >> >> With a compelling case perhaps it could be added. >> > > Ah, the usual "ioctls are ugly, go away". > > It could be done via sysfs: > > $ cat /sys/.../msix/max-interrupts > 256 > $ echo 4 > /sys/.../msix/allocate > $ # subdirectories 0 1 2 3 magically appear > $ # bind fd 13 to msix > $ echo 13 > /sys/.../msix/2/bind-fd > $ # from now on, msix interrupt 2 will call eventfd_signal() on fd 13 > > Call me old fashioned, but I prefer ioctls. Good point. iiuc, the goal relative to ioctls in UIO was to not have device drivers creating their own device-specific ABIs and drivers that are just massive switch statements. Having ioctls that support functions for UIO in general, such as pairing msi vectors to eventfds, does not go against that goal. > > -- > Do not meddle in the internals of kernels, for they are subtle and quick to > panic. > >
On Mon, Mar 29, 2010 at 11:59:24PM +0300, Avi Kivity wrote: > On 03/28/2010 10:48 PM, Cam Macdonell wrote: >> On Sat, Mar 27, 2010 at 11:48 AM, Avi Kivity<avi@redhat.com> wrote: >> >>> On 03/26/2010 07:14 PM, Cam Macdonell wrote: >>> >>>> >>>>> I'm not familiar with the uio internals, but for the interface, an >>>>> ioctl() >>>>> on the fd to assign an eventfd to an MSI vector. Similar to ioeventfd, >>>>> but >>>>> instead of mapping a doorbell to an eventfd, it maps a real MSI to an >>>>> eventfd. >>>>> >>>>> >>>> uio will never support ioctls. >>>> >>> Why not? >>> >> Perhaps I spoke too strongly, but it was rejected before >> >> http://thread.gmane.org/gmane.linux.kernel/756481 >> >> With a compelling case perhaps it could be added. >> > > Ah, the usual "ioctls are ugly, go away". > > It could be done via sysfs: > > $ cat /sys/.../msix/max-interrupts > 256 This one can be discovered with existing sysfs. > $ echo 4 > /sys/.../msix/allocate > $ # subdirectories 0 1 2 3 magically appear > $ # bind fd 13 to msix There's no way to know, when qemu starts, how many vectors will be used by driver. So I think we can just go ahead and allocate as many vectors as supported by device at the moment when the first eventfd is bound. > $ echo 13 > /sys/.../msix/2/bind-fd I think that this one can't work, both fd 13 and sysfs file will get closed when /bin/echo process exits. > $ # from now on, msix interrupt 2 will call eventfd_signal() on fd 13 > > Call me old fashioned, but I prefer ioctls. I think we will have to use ioctl or irqcontrol because of lifetime issues outlines above. > -- > Do not meddle in the internals of kernels, for they are subtle and quick to panic.
On 03/31/2010 12:12 PM, Michael S. Tsirkin wrote: > > >> $ echo 4> /sys/.../msix/allocate >> $ # subdirectories 0 1 2 3 magically appear >> $ # bind fd 13 to msix >> > There's no way to know, when qemu starts, how many vectors will be used > by driver. So I think we can just go ahead and allocate as many vectors > as supported by device at the moment when the first eventfd is bound. > That will cause a huge amount of vectors to be allocated. It's better to do this dynamically in response to guest programming. >> $ echo 13> /sys/.../msix/2/bind-fd >> > I think that this one can't work, both fd 13 and sysfs file will get > closed when /bin/echo process exits. > I meant figuratively. It's pointless to bind an eventfd from a shell. You'd use fprintf() from qemu to bind the eventfd. >> $ # from now on, msix interrupt 2 will call eventfd_signal() on fd 13 >> >> Call me old fashioned, but I prefer ioctls. >> > I think we will have to use ioctl or irqcontrol because of lifetime > issues outlines above. > The lifetime issues don't exist if you do all that from qemu. That's not to say I prefer sysfs to ioctl. What's irqcontrol?
On 03/30/2010 05:52 PM, Cam Macdonell wrote: > >> Ah, the usual "ioctls are ugly, go away". >> >> It could be done via sysfs: >> >> $ cat /sys/.../msix/max-interrupts >> 256 >> $ echo 4> /sys/.../msix/allocate >> $ # subdirectories 0 1 2 3 magically appear >> $ # bind fd 13 to msix >> $ echo 13> /sys/.../msix/2/bind-fd >> $ # from now on, msix interrupt 2 will call eventfd_signal() on fd 13 >> >> Call me old fashioned, but I prefer ioctls. >> > Good point. iiuc, the goal relative to ioctls in UIO was to not have > device drivers creating their own device-specific ABIs and drivers > that are just massive switch statements. Having ioctls that support > functions for UIO in general, such as pairing msi vectors to eventfds, > does not go against that goal. > Device specific ioctls are clearly a bad idea for uio.
On Thu, Apr 01, 2010 at 11:58:29AM +0300, Avi Kivity wrote: > On 03/31/2010 12:12 PM, Michael S. Tsirkin wrote: >> >> >>> $ echo 4> /sys/.../msix/allocate >>> $ # subdirectories 0 1 2 3 magically appear >>> $ # bind fd 13 to msix >>> >> There's no way to know, when qemu starts, how many vectors will be used >> by driver. So I think we can just go ahead and allocate as many vectors >> as supported by device at the moment when the first eventfd is bound. >> > > That will cause a huge amount of vectors to be allocated. It's better > to do this dynamically in response to guest programming. guest unmasks vectors one by one. Linux does not have an API to allocate vectors one by one now. >>> $ echo 13> /sys/.../msix/2/bind-fd >>> >> I think that this one can't work, both fd 13 and sysfs file will get >> closed when /bin/echo process exits. >> > > I meant figuratively. It's pointless to bind an eventfd from a shell. > You'd use fprintf() from qemu to bind the eventfd. > >>> $ # from now on, msix interrupt 2 will call eventfd_signal() on fd 13 >>> >>> Call me old fashioned, but I prefer ioctls. >>> >> I think we will have to use ioctl or irqcontrol because of lifetime >> issues outlines above. >> > > The lifetime issues don't exist if you do all that from qemu. That's > not to say I prefer sysfs to ioctl. > > What's irqcontrol? uio core accepts 32 bit writes and passes the value written as int to an irqcontrol callback in the device. > -- > error compiling committee.c: too many arguments to function
On 04/01/2010 01:59 PM, Michael S. Tsirkin wrote: > On Thu, Apr 01, 2010 at 11:58:29AM +0300, Avi Kivity wrote: > >> On 03/31/2010 12:12 PM, Michael S. Tsirkin wrote: >> >>> >>> >>>> $ echo 4> /sys/.../msix/allocate >>>> $ # subdirectories 0 1 2 3 magically appear >>>> $ # bind fd 13 to msix >>>> >>>> >>> There's no way to know, when qemu starts, how many vectors will be used >>> by driver. So I think we can just go ahead and allocate as many vectors >>> as supported by device at the moment when the first eventfd is bound. >>> >>> >> That will cause a huge amount of vectors to be allocated. It's better >> to do this dynamically in response to guest programming. >> > guest unmasks vectors one by one. > Linux does not have an API to allocate vectors one by one now. > Well, maybe it should. I'm worried that the guest could exhaust host irqs if we allocate the maximum amount. >> What's irqcontrol? >> > uio core accepts 32 bit writes and passes the value written > as int to an irqcontrol callback in the device. > I see. In this case I withdraw my earlier objection about using write() to control eventfd binding, as it's clearly interrupt related. I still prefer an explicit ioctl though. I think you suggested to allow irqcontrol to support >4 byte writes, that should also work.
diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig index 1da73ec..b92cded 100644 --- a/drivers/uio/Kconfig +++ b/drivers/uio/Kconfig @@ -74,6 +74,14 @@ config UIO_SERCOS3 If you compile this as a module, it will be called uio_sercos3. +config UIO_IVSHMEM + tristate "KVM shared memory PCI driver" + default n + help + Userspace I/O interface for the KVM shared memory device. This + driver will make available two memory regions, the first is + registers and the second is a region for sharing between VMs. + config UIO_PCI_GENERIC tristate "Generic driver for PCI 2.3 and PCI Express cards" depends on PCI diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile index 18fd818..25c1ca5 100644 --- a/drivers/uio/Makefile +++ b/drivers/uio/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC) += uio_aec.o obj-$(CONFIG_UIO_SERCOS3) += uio_sercos3.o obj-$(CONFIG_UIO_PCI_GENERIC) += uio_pci_generic.o obj-$(CONFIG_UIO_NETX) += uio_netx.o +obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c new file mode 100644 index 0000000..607435b --- /dev/null +++ b/drivers/uio/uio_ivshmem.c @@ -0,0 +1,235 @@ +/* + * UIO IVShmem Driver + * + * (C) 2009 Cam Macdonell + * based on Hilscher CIF card driver (C) 2007 Hans J. Koch <hjk@linutronix.de> + * + * Licensed under GPL version 2 only. + * + */ + +#include <linux/device.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/uio_driver.h> + +#include <asm/io.h> + +#define IntrStatus 0x04 +#define IntrMask 0x00 + +struct ivshmem_info { + struct uio_info *uio; + struct pci_dev *dev; + char (*msix_names)[256]; + struct msix_entry *msix_entries; + int nvectors; +}; + +static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info) +{ + + void __iomem *plx_intscr = dev_info->mem[0].internal_addr + + IntrStatus; + u32 val; + + val = readl(plx_intscr); + if (val == 0) + return IRQ_NONE; + + printk(KERN_INFO "Regular interrupt (val = %d)\n", val); + return IRQ_HANDLED; +} + +static irqreturn_t ivshmem_msix_handler(int irq, void *opaque) +{ + + struct uio_info * dev_info = (struct uio_info *) opaque; + + /* we have to do this explicitly when using MSI-X */ + uio_event_notify(dev_info); + printk(KERN_INFO "MSI-X interrupt (%d)\n", irq); + return IRQ_HANDLED; + +} + +static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors) +{ + int i, err; + const char *name = "ivshmem"; + + printk(KERN_INFO "devname is %s\n", name); + ivs_info->nvectors = nvectors; + + + ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries, + GFP_KERNEL); + ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names, + GFP_KERNEL); + + for (i = 0; i < nvectors; ++i) + ivs_info->msix_entries[i].entry = i; + + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, + ivs_info->nvectors); + if (err > 0) { + ivs_info->nvectors = err; /* msi-x positive error code + returns the number available*/ + err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries, + ivs_info->nvectors); + if (err > 0) { + printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err); + return -ENOSPC; + } + } + + printk(KERN_INFO "err is %d\n", err); + if (err) return err; + + for (i = 0; i < ivs_info->nvectors; i++) { + + snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names, + "%s-config", name); + + ivs_info->msix_entries[i].entry = i; + err = request_irq(ivs_info->msix_entries[i].vector, + ivshmem_msix_handler, 0, + ivs_info->msix_names[i], ivs_info->uio); + + if (err) { + return -ENOSPC; + } + } + + return 0; +} + +static int __devinit ivshmem_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct uio_info *info; + struct ivshmem_info * ivshmem_info; + int nvectors = 4; + + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + ivshmem_info = kzalloc(sizeof(struct ivshmem_info), GFP_KERNEL); + if (!ivshmem_info) { + kfree(info); + return -ENOMEM; + } + + if (pci_enable_device(dev)) + goto out_free; + + if (pci_request_regions(dev, "ivshmem")) + goto out_disable; + + info->mem[0].addr = pci_resource_start(dev, 0); + if (!info->mem[0].addr) + goto out_release; + + info->mem[0].size = pci_resource_len(dev, 0); + info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); + if (!info->mem[0].internal_addr) { + printk(KERN_INFO "got a null"); + goto out_release; + } + + info->mem[0].memtype = UIO_MEM_PHYS; + + info->mem[1].addr = pci_resource_start(dev, 2); + if (!info->mem[1].addr) + goto out_unmap; + info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); + if (!info->mem[1].internal_addr) + goto out_unmap; + + info->mem[1].size = pci_resource_len(dev, 2); + info->mem[1].memtype = UIO_MEM_PHYS; + + ivshmem_info->uio = info; + ivshmem_info->dev = dev; + + if (request_msix_vectors(ivshmem_info, nvectors) != 0) { + printk(KERN_INFO "regular IRQs\n"); + info->irq = dev->irq; + info->irq_flags = IRQF_SHARED; + info->handler = ivshmem_handler; + writel(0xffffffff, info->mem[0].internal_addr + IntrMask); + } else { + printk(KERN_INFO "MSI-X enabled\n"); + info->irq = -1; + } + + info->name = "ivshmem"; + info->version = "0.0.1"; + + if (uio_register_device(&dev->dev, info)) + goto out_unmap2; + + pci_set_drvdata(dev, info); + + + return 0; +out_unmap2: + iounmap(info->mem[2].internal_addr); +out_unmap: + iounmap(info->mem[0].internal_addr); +out_release: + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_free: + kfree (info); + return -ENODEV; +} + +static void ivshmem_pci_remove(struct pci_dev *dev) +{ + struct uio_info *info = pci_get_drvdata(dev); + + uio_unregister_device(info); + pci_release_regions(dev); + pci_disable_device(dev); + pci_set_drvdata(dev, NULL); + iounmap(info->mem[0].internal_addr); + + kfree (info); +} + +static struct pci_device_id ivshmem_pci_ids[] __devinitdata = { + { + .vendor = 0x1af4, + .device = 0x1110, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, + { 0, } +}; + +static struct pci_driver ivshmem_pci_driver = { + .name = "uio_ivshmem", + .id_table = ivshmem_pci_ids, + .probe = ivshmem_pci_probe, + .remove = ivshmem_pci_remove, +}; + +static int __init ivshmem_init_module(void) +{ + return pci_register_driver(&ivshmem_pci_driver); +} + +static void __exit ivshmem_exit_module(void) +{ + pci_unregister_driver(&ivshmem_pci_driver); +} + +module_init(ivshmem_init_module); +module_exit(ivshmem_exit_module); + +MODULE_DEVICE_TABLE(pci, ivshmem_pci_ids); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cam Macdonell");