Message ID | 20100223205225.14860.50990.stgit@st-brides.cs.ualberta.ca |
---|---|
State | New |
Headers | show |
Hi Cam, On 02/23/2010 02:52 PM, Cam Macdonell wrote: > Support for passing memory regions via VirtIO to remove need for PCI > support in the guest. > > Adds new vectors to VirtIO config space to pass memory regions similar > to how virtqueues are passed. > > Kernel patch is forthcoming that add device_ops to access the memory regions. > > I have used this mechanism to implement my host shared memory implementation > and modified Alex's Virtio FB to use it as well. > Virtio is really a DMA engine. One of the nice things about it's design is that you can do things like transparent bounce buffering if needed. Adding a mechanism like this breaks this abstract and turns virtio into something that's more than I think it should be. For guest shared memory, what makes sense to me is to make use of uio_pci within a guest to map bars in userspace. The device can have the first bar map a small MMIO region that is tied to a ioeventfd with KVM. With just qemu, it can be tied to a normal eventfd. You can use irqfd with KVM to tie an eventfd to the PCI irq. Likewise, you can use a normal eventfd to emulate that. I'd suggest using a /dev/shm file to act as the shared memory segment. You'll need a little magic within qemu_ram_alloc() to use this (maybe qemu_ram_alloc() should take a context parameter so this can be setup). You'll need to setup the backend with the name of the shared memory segment and the two eventfds. Also, a user needs to specify a pci vendor/device id. It's probably worth having some kind of identifier tag in the PCI device's first MMIO region. I think there's a couple important characteristics of an implementation like this. By using a uio_pci, you can implement shared memory in userspace but you can also implement a proper kernel driver if you so desired for a specific device. Regards, Anthony Liguori > Comments are welcome, > > Thanks, > Cam > --- > hw/virtio-pci.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++---- > hw/virtio.c | 38 ++++++++++++++++++++++++++++++++++ > hw/virtio.h | 22 ++++++++++++++++++++ > 3 files changed, 117 insertions(+), 4 deletions(-) > > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c > index f3373ae..e71bf64 100644 > --- a/hw/virtio-pci.c > +++ b/hw/virtio-pci.c > @@ -55,13 +55,13 @@ > > /* MSI-X registers: only enabled if MSI-X is enabled. */ > /* A 16-bit vector for configuration changes. */ > -#define VIRTIO_MSI_CONFIG_VECTOR 20 > +#define VIRTIO_MSI_CONFIG_VECTOR 32 > /* A 16-bit vector for selected queue notifications. */ > -#define VIRTIO_MSI_QUEUE_VECTOR 22 > +#define VIRTIO_MSI_QUEUE_VECTOR 34 > > /* Config space size */ > -#define VIRTIO_PCI_CONFIG_NOMSI 20 > -#define VIRTIO_PCI_CONFIG_MSI 24 > +#define VIRTIO_PCI_CONFIG_NOMSI 32 > +#define VIRTIO_PCI_CONFIG_MSI 36 > #define VIRTIO_PCI_REGION_SIZE(dev) (msix_present(dev) ? \ > VIRTIO_PCI_CONFIG_MSI : \ > VIRTIO_PCI_CONFIG_NOMSI) > @@ -72,6 +72,12 @@ > VIRTIO_PCI_CONFIG_MSI : \ > VIRTIO_PCI_CONFIG_NOMSI) > > +#define VIRTIO_PCI_MEM_SEL 20 > + > +#define VIRTIO_PCI_MEM_SIZE 24 > + > +#define VIRTIO_PCI_MEM_PFN 28 > + > /* Virtio ABI version, if we increment this, we break the guest driver. */ > #define VIRTIO_PCI_ABI_VERSION 0 > > @@ -227,6 +233,11 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) > val = VIRTIO_NO_VECTOR; > virtio_queue_set_vector(vdev, vdev->queue_sel, val); > break; > + case VIRTIO_PCI_MEM_SEL: > + fprintf(stderr, "%s: VIRTIO_PCI_MEM_SEL %d\n", __func__, val); > + if (val< VIRTIO_PCI_MEM_MAX) > + vdev->mem_sel = val; > + break; > default: > fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n", > __func__, addr, val); > @@ -271,6 +282,14 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr) > case VIRTIO_MSI_QUEUE_VECTOR: > ret = virtio_queue_vector(vdev, vdev->queue_sel); > break; > + case VIRTIO_PCI_MEM_SIZE: > + ret = virtio_mem_get_size(vdev, vdev->mem_sel); > + break; > + case VIRTIO_PCI_MEM_PFN: > + ret = virtio_mem_get_addr(vdev, vdev->mem_sel) > +>> VIRTIO_PCI_QUEUE_ADDR_SHIFT; > + break; > + > default: > break; > } > @@ -370,6 +389,30 @@ static void virtio_map(PCIDevice *pci_dev, int region_num, > vdev->get_config(vdev, vdev->config); > } > > + > +static void virtio_setup_mem(PCIDevice *d, int region_num, > + pcibus_t addr, pcibus_t size, int type) > +{ > + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, d); > + VirtIODevice *vdev; > + > + printf("XXX setmem to %#x - %#x\n", (uint32_t)addr, (uint32_t)(addr + size)); > + vdev = proxy->vdev; > + > + cpu_register_physical_memory(addr, size, IO_MEM_UNASSIGNED); > + cpu_register_physical_memory(addr, size, vdev->vmem[region_num-2].ram_addr | IO_MEM_RAM); > + > + /* store the address of the region and get a pointer for it > + * that can access the mem region */ > + vdev->vmem[region_num-2].guest_phys_addr = (target_phys_addr_t)addr; > + > + if (vdev->vmem[region_num-2].on_map) { > + vdev->vmem[region_num-2].on_map(vdev, addr, size); > + } > + return; > + > +} > + > static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, > uint32_t val, int len) > { > @@ -406,6 +449,7 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, > { > uint8_t *config; > uint32_t size; > + int i; > > proxy->vdev = vdev; > > @@ -426,6 +470,15 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, > > config[0x3d] = 1; > > + // only BARs that were added with virtio_add_memory will be allocated here > + for (i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++) { > + if (vdev->vmem[i].size> 0) { > + pci_register_bar(&proxy->pci_dev, 2 + i, vdev->vmem[i].size, > + PCI_BASE_ADDRESS_SPACE_MEMORY, virtio_setup_mem); > + } > + } > + > + > if (vdev->nvectors&& !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) { > pci_register_bar(&proxy->pci_dev, 1, > msix_bar_size(&proxy->pci_dev), > diff --git a/hw/virtio.c b/hw/virtio.c > index 7c020a3..1f7924e 100644 > --- a/hw/virtio.c > +++ b/hw/virtio.c > @@ -549,6 +549,16 @@ target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n) > return vdev->vq[n].pa; > } > > +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n) > +{ > + return vdev->vmem[n].guest_phys_addr; > +} > + > +int virtio_mem_get_size(VirtIODevice *vdev, int n) > +{ > + return vdev->vmem[n].size; > +} > + > int virtio_queue_get_num(VirtIODevice *vdev, int n) > { > return vdev->vq[n].vring.num; > @@ -592,6 +602,33 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, > return&vdev->vq[i]; > } > > +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, > + void (*handle_access)(VirtIODevice *, > + target_phys_addr_t, uint32_t)) > +{ > + int i; > + > + /* trace what virtio_fb_setmem does */ > + > + for (i = 0; i< VIRTIO_PCI_MEM_MAX; i++) { > + if (vdev->vmem[i].size == 0) { > + /* XXX: check that memory is a power of 2 */ > + vdev->vmem[i].size = mem_size * 1024 * 1024; /* presume size is in MBs*/ > + vdev->vmem[i].ram_addr = qemu_ram_alloc(vdev->vmem[i].size); > + vdev->vmem[i].host_ptr = qemu_get_ram_ptr(vdev->vmem[i].ram_addr); > + vdev->vmem[i].on_map = handle_access; > + break; > + } > + } > + > + return&vdev->vmem[i]; > +} > + > +void * virtio_get_memory_ptr(VirtMem * vmem) > +{ > + return vmem->host_ptr; > +} > + > void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) > { > /* Always notify when queue is empty (when feature acknowledge) */ > @@ -714,6 +751,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, > vdev->queue_sel = 0; > vdev->config_vector = VIRTIO_NO_VECTOR; > vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); > + vdev->vmem = qemu_mallocz(sizeof(VirtMem) * VIRTIO_PCI_MEM_MAX); > for(i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++) > vdev->vq[i].vector = VIRTIO_NO_VECTOR; > > diff --git a/hw/virtio.h b/hw/virtio.h > index 3baa2a3..02beb27 100644 > --- a/hw/virtio.h > +++ b/hw/virtio.h > @@ -82,6 +82,15 @@ typedef struct VirtQueueElement > struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; > } VirtQueueElement; > > +typedef struct VirtMem > +{ > + target_phys_addr_t guest_phys_addr; > + ram_addr_t ram_addr; > + int size; > + uint8_t * host_ptr; > + void (*on_map)(VirtIODevice *vdev, target_phys_addr_t, uint32_t); > +} VirtMem; > + > typedef struct { > void (*notify)(void * opaque, uint16_t vector); > void (*save_config)(void * opaque, QEMUFile *f); > @@ -93,6 +102,8 @@ typedef struct { > > #define VIRTIO_PCI_QUEUE_MAX 64 > > +#define VIRTIO_PCI_MEM_MAX 16 > + > #define VIRTIO_NO_VECTOR 0xffff > > struct VirtIODevice > @@ -101,6 +112,7 @@ struct VirtIODevice > uint8_t status; > uint8_t isr; > uint16_t queue_sel; > + uint16_t mem_sel; > uint32_t guest_features; > size_t config_len; > void *config; > @@ -113,6 +125,7 @@ struct VirtIODevice > void (*set_config)(VirtIODevice *vdev, const uint8_t *config); > void (*reset)(VirtIODevice *vdev); > VirtQueue *vq; > + VirtMem * vmem; > const VirtIOBindings *binding; > void *binding_opaque; > uint16_t device_id; > @@ -122,6 +135,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, > void (*handle_output)(VirtIODevice *, > VirtQueue *)); > > +void * virtio_get_memory_ptr(VirtMem * vmem); > +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, > + void (*handle_access)(VirtIODevice *, > + target_phys_addr_t, > + uint32_t)); > + > void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, > unsigned int len); > void virtqueue_flush(VirtQueue *vq, unsigned int count); > @@ -169,6 +188,9 @@ void virtio_update_irq(VirtIODevice *vdev); > void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding, > void *opaque); > > +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n); > +int virtio_mem_get_size(VirtIODevice *vdev, int n); > + > /* Base devices. */ > VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf); > VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf); > > >
On 02/23/2010 03:05 PM, Anthony Liguori wrote: > Hi Cam, > > On 02/23/2010 02:52 PM, Cam Macdonell wrote: >> Support for passing memory regions via VirtIO to remove need for PCI >> support in the guest. >> >> Adds new vectors to VirtIO config space to pass memory regions similar >> to how virtqueues are passed. >> >> Kernel patch is forthcoming that add device_ops to access the memory >> regions. >> >> I have used this mechanism to implement my host shared memory >> implementation >> and modified Alex's Virtio FB to use it as well. > > Virtio is really a DMA engine. One of the nice things about it's > design is that you can do things like transparent bounce buffering if > needed. Adding a mechanism like this breaks this abstract and turns > virtio into something that's more than I think it should be. More specifically, virtio does not assume cache coherent shared memory today. While we assume this in the virtio-pci vring implementation, that's just an implementation detail of one transport. Adding generic shared memory to virtio presumes that one can have cache coherent shared memory in any virtio transport which is not a good assumption to make. Regards, Anthony Liguori
On Tue, Feb 23, 2010 at 2:05 PM, Anthony Liguori <anthony@codemonkey.ws> wrote: > Hi Cam, > > On 02/23/2010 02:52 PM, Cam Macdonell wrote: >> >> Support for passing memory regions via VirtIO to remove need for PCI >> support in the guest. >> >> Adds new vectors to VirtIO config space to pass memory regions similar >> to how virtqueues are passed. >> >> Kernel patch is forthcoming that add device_ops to access the memory >> regions. >> >> I have used this mechanism to implement my host shared memory >> implementation >> and modified Alex's Virtio FB to use it as well. >> > > Virtio is really a DMA engine. One of the nice things about it's design is > that you can do things like transparent bounce buffering if needed. Adding > a mechanism like this breaks this abstract and turns virtio into something > that's more than I think it should be. > > For guest shared memory, what makes sense to me is to make use of uio_pci > within a guest to map bars in userspace. The device can have the first bar > map a small MMIO region that is tied to a ioeventfd with KVM. With just > qemu, it can be tied to a normal eventfd. You can use irqfd with KVM to tie > an eventfd to the PCI irq. Likewise, you can use a normal eventfd to > emulate that. I'd suggest using a /dev/shm file to act as the shared memory > segment. You'll need a little magic within qemu_ram_alloc() to use this > (maybe qemu_ram_alloc() should take a context parameter so this can be > setup). Part of my goal with this patch was to support systems without PCI. My previous patch http://www.mail-archive.com/kvm@vger.kernel.org/msg14284.html is a PCI implementation that uses eventfds (no ioeventfds yet) to support interrupts between guests and a /dev/shm file to share between guests. I'm not sure what you're suggesting the interrupts and eventfds would be used for if the mapping is initiated by the guest in the case. To be clear, this patch is not for shared memory per se, but adds memory regions that have multiple uses and that doesn't rely on PCI to access them. For example, I have modified Alex's virtio frame buffer as a test of this patch of a use that doesn't share memory between guests. But my interest particularly is to share memory using VirtIO. In the response to my initial PCI implementation, you had suggested using VirtIO, but the discussion around virtio not being ideal for importing memory (from another guest or the host) led to Christian's suggestion of the new device_ops which I ran with (slowly). > > You'll need to setup the backend with the name of the shared memory segment > and the two eventfds. Also, a user needs to specify a pci vendor/device id. > It's probably worth having some kind of identifier tag in the PCI device's > first MMIO region. > > I think there's a couple important characteristics of an implementation like > this. By using a uio_pci, you can implement shared memory in userspace but > you can also implement a proper kernel driver if you so desired for a > specific device. Agreed, you had suggested uio_pci for my PCI driver and I'd be happy to work on that if we can agree on what the underlying device looks like whether it should be VirtIO or just PCI. Thanks for the feedback, Cam > > Regards, > > Anthony Liguori > >> Comments are welcome, >> >> Thanks, >> Cam >> --- >> hw/virtio-pci.c | 61 >> +++++++++++++++++++++++++++++++++++++++++++++++++++---- >> hw/virtio.c | 38 ++++++++++++++++++++++++++++++++++ >> hw/virtio.h | 22 ++++++++++++++++++++ >> 3 files changed, 117 insertions(+), 4 deletions(-) >> >> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c >> index f3373ae..e71bf64 100644 >> --- a/hw/virtio-pci.c >> +++ b/hw/virtio-pci.c >> @@ -55,13 +55,13 @@ >> >> /* MSI-X registers: only enabled if MSI-X is enabled. */ >> /* A 16-bit vector for configuration changes. */ >> -#define VIRTIO_MSI_CONFIG_VECTOR 20 >> +#define VIRTIO_MSI_CONFIG_VECTOR 32 >> /* A 16-bit vector for selected queue notifications. */ >> -#define VIRTIO_MSI_QUEUE_VECTOR 22 >> +#define VIRTIO_MSI_QUEUE_VECTOR 34 >> >> /* Config space size */ >> -#define VIRTIO_PCI_CONFIG_NOMSI 20 >> -#define VIRTIO_PCI_CONFIG_MSI 24 >> +#define VIRTIO_PCI_CONFIG_NOMSI 32 >> +#define VIRTIO_PCI_CONFIG_MSI 36 >> #define VIRTIO_PCI_REGION_SIZE(dev) (msix_present(dev) ? \ >> VIRTIO_PCI_CONFIG_MSI : \ >> VIRTIO_PCI_CONFIG_NOMSI) >> @@ -72,6 +72,12 @@ >> VIRTIO_PCI_CONFIG_MSI : \ >> VIRTIO_PCI_CONFIG_NOMSI) >> >> +#define VIRTIO_PCI_MEM_SEL 20 >> + >> +#define VIRTIO_PCI_MEM_SIZE 24 >> + >> +#define VIRTIO_PCI_MEM_PFN 28 >> + >> /* Virtio ABI version, if we increment this, we break the guest driver. >> */ >> #define VIRTIO_PCI_ABI_VERSION 0 >> >> @@ -227,6 +233,11 @@ static void virtio_ioport_write(void *opaque, >> uint32_t addr, uint32_t val) >> val = VIRTIO_NO_VECTOR; >> virtio_queue_set_vector(vdev, vdev->queue_sel, val); >> break; >> + case VIRTIO_PCI_MEM_SEL: >> + fprintf(stderr, "%s: VIRTIO_PCI_MEM_SEL %d\n", __func__, val); >> + if (val< VIRTIO_PCI_MEM_MAX) >> + vdev->mem_sel = val; >> + break; >> default: >> fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n", >> __func__, addr, val); >> @@ -271,6 +282,14 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy >> *proxy, uint32_t addr) >> case VIRTIO_MSI_QUEUE_VECTOR: >> ret = virtio_queue_vector(vdev, vdev->queue_sel); >> break; >> + case VIRTIO_PCI_MEM_SIZE: >> + ret = virtio_mem_get_size(vdev, vdev->mem_sel); >> + break; >> + case VIRTIO_PCI_MEM_PFN: >> + ret = virtio_mem_get_addr(vdev, vdev->mem_sel) >> +>> VIRTIO_PCI_QUEUE_ADDR_SHIFT; >> + break; >> + >> default: >> break; >> } >> @@ -370,6 +389,30 @@ static void virtio_map(PCIDevice *pci_dev, int >> region_num, >> vdev->get_config(vdev, vdev->config); >> } >> >> + >> +static void virtio_setup_mem(PCIDevice *d, int region_num, >> + pcibus_t addr, pcibus_t size, int type) >> +{ >> + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, d); >> + VirtIODevice *vdev; >> + >> + printf("XXX setmem to %#x - %#x\n", (uint32_t)addr, (uint32_t)(addr + >> size)); >> + vdev = proxy->vdev; >> + >> + cpu_register_physical_memory(addr, size, IO_MEM_UNASSIGNED); >> + cpu_register_physical_memory(addr, size, >> vdev->vmem[region_num-2].ram_addr | IO_MEM_RAM); >> + >> + /* store the address of the region and get a pointer for it >> + * that can access the mem region */ >> + vdev->vmem[region_num-2].guest_phys_addr = (target_phys_addr_t)addr; >> + >> + if (vdev->vmem[region_num-2].on_map) { >> + vdev->vmem[region_num-2].on_map(vdev, addr, size); >> + } >> + return; >> + >> +} >> + >> static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, >> uint32_t val, int len) >> { >> @@ -406,6 +449,7 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, >> VirtIODevice *vdev, >> { >> uint8_t *config; >> uint32_t size; >> + int i; >> >> proxy->vdev = vdev; >> >> @@ -426,6 +470,15 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, >> VirtIODevice *vdev, >> >> config[0x3d] = 1; >> >> + // only BARs that were added with virtio_add_memory will be allocated >> here >> + for (i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++) { >> + if (vdev->vmem[i].size> 0) { >> + pci_register_bar(&proxy->pci_dev, 2 + i, vdev->vmem[i].size, >> + PCI_BASE_ADDRESS_SPACE_MEMORY, virtio_setup_mem); >> + } >> + } >> + >> + >> if (vdev->nvectors&& !msix_init(&proxy->pci_dev, vdev->nvectors, 1, >> 0)) { >> pci_register_bar(&proxy->pci_dev, 1, >> msix_bar_size(&proxy->pci_dev), >> diff --git a/hw/virtio.c b/hw/virtio.c >> index 7c020a3..1f7924e 100644 >> --- a/hw/virtio.c >> +++ b/hw/virtio.c >> @@ -549,6 +549,16 @@ target_phys_addr_t virtio_queue_get_addr(VirtIODevice >> *vdev, int n) >> return vdev->vq[n].pa; >> } >> >> +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n) >> +{ >> + return vdev->vmem[n].guest_phys_addr; >> +} >> + >> +int virtio_mem_get_size(VirtIODevice *vdev, int n) >> +{ >> + return vdev->vmem[n].size; >> +} >> + >> int virtio_queue_get_num(VirtIODevice *vdev, int n) >> { >> return vdev->vq[n].vring.num; >> @@ -592,6 +602,33 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int >> queue_size, >> return&vdev->vq[i]; >> } >> >> +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, >> + void (*handle_access)(VirtIODevice *, >> + target_phys_addr_t, uint32_t)) >> +{ >> + int i; >> + >> + /* trace what virtio_fb_setmem does */ >> + >> + for (i = 0; i< VIRTIO_PCI_MEM_MAX; i++) { >> + if (vdev->vmem[i].size == 0) { >> + /* XXX: check that memory is a power of 2 */ >> + vdev->vmem[i].size = mem_size * 1024 * 1024; /* presume size >> is in MBs*/ >> + vdev->vmem[i].ram_addr = qemu_ram_alloc(vdev->vmem[i].size); >> + vdev->vmem[i].host_ptr = >> qemu_get_ram_ptr(vdev->vmem[i].ram_addr); >> + vdev->vmem[i].on_map = handle_access; >> + break; >> + } >> + } >> + >> + return&vdev->vmem[i]; >> +} >> + >> +void * virtio_get_memory_ptr(VirtMem * vmem) >> +{ >> + return vmem->host_ptr; >> +} >> + >> void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) >> { >> /* Always notify when queue is empty (when feature acknowledge) */ >> @@ -714,6 +751,7 @@ VirtIODevice *virtio_common_init(const char *name, >> uint16_t device_id, >> vdev->queue_sel = 0; >> vdev->config_vector = VIRTIO_NO_VECTOR; >> vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); >> + vdev->vmem = qemu_mallocz(sizeof(VirtMem) * VIRTIO_PCI_MEM_MAX); >> for(i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++) >> vdev->vq[i].vector = VIRTIO_NO_VECTOR; >> >> diff --git a/hw/virtio.h b/hw/virtio.h >> index 3baa2a3..02beb27 100644 >> --- a/hw/virtio.h >> +++ b/hw/virtio.h >> @@ -82,6 +82,15 @@ typedef struct VirtQueueElement >> struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; >> } VirtQueueElement; >> >> +typedef struct VirtMem >> +{ >> + target_phys_addr_t guest_phys_addr; >> + ram_addr_t ram_addr; >> + int size; >> + uint8_t * host_ptr; >> + void (*on_map)(VirtIODevice *vdev, target_phys_addr_t, uint32_t); >> +} VirtMem; >> + >> typedef struct { >> void (*notify)(void * opaque, uint16_t vector); >> void (*save_config)(void * opaque, QEMUFile *f); >> @@ -93,6 +102,8 @@ typedef struct { >> >> #define VIRTIO_PCI_QUEUE_MAX 64 >> >> +#define VIRTIO_PCI_MEM_MAX 16 >> + >> #define VIRTIO_NO_VECTOR 0xffff >> >> struct VirtIODevice >> @@ -101,6 +112,7 @@ struct VirtIODevice >> uint8_t status; >> uint8_t isr; >> uint16_t queue_sel; >> + uint16_t mem_sel; >> uint32_t guest_features; >> size_t config_len; >> void *config; >> @@ -113,6 +125,7 @@ struct VirtIODevice >> void (*set_config)(VirtIODevice *vdev, const uint8_t *config); >> void (*reset)(VirtIODevice *vdev); >> VirtQueue *vq; >> + VirtMem * vmem; >> const VirtIOBindings *binding; >> void *binding_opaque; >> uint16_t device_id; >> @@ -122,6 +135,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int >> queue_size, >> void (*handle_output)(VirtIODevice *, >> VirtQueue *)); >> >> +void * virtio_get_memory_ptr(VirtMem * vmem); >> +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, >> + void (*handle_access)(VirtIODevice *, >> + target_phys_addr_t, >> + uint32_t)); >> + >> void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, >> unsigned int len); >> void virtqueue_flush(VirtQueue *vq, unsigned int count); >> @@ -169,6 +188,9 @@ void virtio_update_irq(VirtIODevice *vdev); >> void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings >> *binding, >> void *opaque); >> >> +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n); >> +int virtio_mem_get_size(VirtIODevice *vdev, int n); >> + >> /* Base devices. */ >> VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf); >> VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf); >> >> >> > >
On 02/24/2010 08:20 AM, Cam Macdonell wrote: > >> Virtio is really a DMA engine. One of the nice things about it's design is >> that you can do things like transparent bounce buffering if needed. Adding >> a mechanism like this breaks this abstract and turns virtio into something >> that's more than I think it should be. >> >> For guest shared memory, what makes sense to me is to make use of uio_pci >> within a guest to map bars in userspace. The device can have the first bar >> map a small MMIO region that is tied to a ioeventfd with KVM. With just >> qemu, it can be tied to a normal eventfd. You can use irqfd with KVM to tie >> an eventfd to the PCI irq. Likewise, you can use a normal eventfd to >> emulate that. I'd suggest using a /dev/shm file to act as the shared memory >> segment. You'll need a little magic within qemu_ram_alloc() to use this >> (maybe qemu_ram_alloc() should take a context parameter so this can be >> setup). >> > Part of my goal with this patch was to support systems without PCI. > My previous patch > Which? The only one I know of is s390 (and it would be a pity not to have shared memory for that).
On 02/24/2010 05:14 AM, Avi Kivity wrote: > On 02/24/2010 08:20 AM, Cam Macdonell wrote: >> >>> Virtio is really a DMA engine. One of the nice things about it's >>> design is >>> that you can do things like transparent bounce buffering if needed. >>> Adding >>> a mechanism like this breaks this abstract and turns virtio into >>> something >>> that's more than I think it should be. >>> >>> For guest shared memory, what makes sense to me is to make use of >>> uio_pci >>> within a guest to map bars in userspace. The device can have the >>> first bar >>> map a small MMIO region that is tied to a ioeventfd with KVM. With >>> just >>> qemu, it can be tied to a normal eventfd. You can use irqfd with >>> KVM to tie >>> an eventfd to the PCI irq. Likewise, you can use a normal eventfd to >>> emulate that. I'd suggest using a /dev/shm file to act as the >>> shared memory >>> segment. You'll need a little magic within qemu_ram_alloc() to use >>> this >>> (maybe qemu_ram_alloc() should take a context parameter so this can be >>> setup). >> Part of my goal with this patch was to support systems without PCI. >> My previous patch > > Which? The only one I know of is s390 (and it would be a pity not to > have shared memory for that). uio provides insulation against devices and busses. When you use uio in userspace, you don't have to have direct knowledge of the fact that a device is or isn't a PCI device. Regards, Anthony Liguori
On 02/24/2010 12:20 AM, Cam Macdonell wrote: > On Tue, Feb 23, 2010 at 2:05 PM, Anthony Liguori<anthony@codemonkey.ws> wrote: > >> Hi Cam, >> >> On 02/23/2010 02:52 PM, Cam Macdonell wrote: >> >>> Support for passing memory regions via VirtIO to remove need for PCI >>> support in the guest. >>> >>> Adds new vectors to VirtIO config space to pass memory regions similar >>> to how virtqueues are passed. >>> >>> Kernel patch is forthcoming that add device_ops to access the memory >>> regions. >>> >>> I have used this mechanism to implement my host shared memory >>> implementation >>> and modified Alex's Virtio FB to use it as well. >>> >>> >> Virtio is really a DMA engine. One of the nice things about it's design is >> that you can do things like transparent bounce buffering if needed. Adding >> a mechanism like this breaks this abstract and turns virtio into something >> that's more than I think it should be. >> >> For guest shared memory, what makes sense to me is to make use of uio_pci >> within a guest to map bars in userspace. The device can have the first bar >> map a small MMIO region that is tied to a ioeventfd with KVM. With just >> qemu, it can be tied to a normal eventfd. You can use irqfd with KVM to tie >> an eventfd to the PCI irq. Likewise, you can use a normal eventfd to >> emulate that. I'd suggest using a /dev/shm file to act as the shared memory >> segment. You'll need a little magic within qemu_ram_alloc() to use this >> (maybe qemu_ram_alloc() should take a context parameter so this can be >> setup). >> > Part of my goal with this patch was to support systems without PCI. > My previous patch > > http://www.mail-archive.com/kvm@vger.kernel.org/msg14284.html > > is a PCI implementation that uses eventfds (no ioeventfds yet) to > support interrupts between guests and a /dev/shm file to share between > guests. I'm not sure what you're suggesting the interrupts and > eventfds would be used for if the mapping is initiated by the guest in > the case. > > To be clear, this patch is not for shared memory per se, but adds > memory regions that have multiple uses and that doesn't rely on PCI to > access them. For example, I have modified Alex's virtio frame buffer > as a test of this patch of a use that doesn't share memory between > guests. But my interest particularly is to share memory using VirtIO. > Shared memory doesn't fit into the way virtio is modelled. virtio is very device centric and it's generally impossible to have cache coherent shared memory with most types of devices (certainly with all PCI devices). > In the response to my initial PCI implementation, you had suggested > using VirtIO, I was wrong in making that suggestion :-) Technically, I suggested that you send a shared memory buffer over the ring, not add a shared memory pool to virtio. The problem with this suggest is that it again breaks the DMA model. >> You'll need to setup the backend with the name of the shared memory segment >> and the two eventfds. Also, a user needs to specify a pci vendor/device id. >> It's probably worth having some kind of identifier tag in the PCI device's >> first MMIO region. >> >> I think there's a couple important characteristics of an implementation like >> this. By using a uio_pci, you can implement shared memory in userspace but >> you can also implement a proper kernel driver if you so desired for a >> specific device. >> > Agreed, you had suggested uio_pci for my PCI driver and I'd be happy > to work on that if we can agree on what the underlying device looks > like whether it should be VirtIO or just PCI. > I feel pretty strongly that we shouldn't add shared memory to virtio. I'm pretty sure Christian would too because it eliminates the possibility of implementing a copy-based transport for virtio (virtio-over-ethernet). With respect to making a generic shared memory mechanism that works for non-PCI systems, I'm somewhat indifferent. We could create the virtio-equivalent of a shared memory mechanism but IMHO, shared memory is an imperfect mechanism and building that level of infrastructure around it isn't worthwhile. Guest shared memory means you're implement a device driver outside of qemu or potentially in another guest. It's interesting for a few one-off circumstances but in general, it means things like live migration are extremely difficult. I still think the right thing to do for virtio-framebuffer is to copy the data into a shadow framebuffer and send rectangular updates over virtio. I know Alex disagrees, but since you have to do this in the backend anyway, I don't see the point in having qemu do this work instead of having the guest do the work. Regards, Anthony Liguori > Thanks for the feedback, > Cam > > >> Regards, >> >> Anthony Liguori >> >> >>> Comments are welcome, >>> >>> Thanks, >>> Cam >>> --- >>> hw/virtio-pci.c | 61 >>> +++++++++++++++++++++++++++++++++++++++++++++++++++---- >>> hw/virtio.c | 38 ++++++++++++++++++++++++++++++++++ >>> hw/virtio.h | 22 ++++++++++++++++++++ >>> 3 files changed, 117 insertions(+), 4 deletions(-) >>> >>> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c >>> index f3373ae..e71bf64 100644 >>> --- a/hw/virtio-pci.c >>> +++ b/hw/virtio-pci.c >>> @@ -55,13 +55,13 @@ >>> >>> /* MSI-X registers: only enabled if MSI-X is enabled. */ >>> /* A 16-bit vector for configuration changes. */ >>> -#define VIRTIO_MSI_CONFIG_VECTOR 20 >>> +#define VIRTIO_MSI_CONFIG_VECTOR 32 >>> /* A 16-bit vector for selected queue notifications. */ >>> -#define VIRTIO_MSI_QUEUE_VECTOR 22 >>> +#define VIRTIO_MSI_QUEUE_VECTOR 34 >>> >>> /* Config space size */ >>> -#define VIRTIO_PCI_CONFIG_NOMSI 20 >>> -#define VIRTIO_PCI_CONFIG_MSI 24 >>> +#define VIRTIO_PCI_CONFIG_NOMSI 32 >>> +#define VIRTIO_PCI_CONFIG_MSI 36 >>> #define VIRTIO_PCI_REGION_SIZE(dev) (msix_present(dev) ? \ >>> VIRTIO_PCI_CONFIG_MSI : \ >>> VIRTIO_PCI_CONFIG_NOMSI) >>> @@ -72,6 +72,12 @@ >>> VIRTIO_PCI_CONFIG_MSI : \ >>> VIRTIO_PCI_CONFIG_NOMSI) >>> >>> +#define VIRTIO_PCI_MEM_SEL 20 >>> + >>> +#define VIRTIO_PCI_MEM_SIZE 24 >>> + >>> +#define VIRTIO_PCI_MEM_PFN 28 >>> + >>> /* Virtio ABI version, if we increment this, we break the guest driver. >>> */ >>> #define VIRTIO_PCI_ABI_VERSION 0 >>> >>> @@ -227,6 +233,11 @@ static void virtio_ioport_write(void *opaque, >>> uint32_t addr, uint32_t val) >>> val = VIRTIO_NO_VECTOR; >>> virtio_queue_set_vector(vdev, vdev->queue_sel, val); >>> break; >>> + case VIRTIO_PCI_MEM_SEL: >>> + fprintf(stderr, "%s: VIRTIO_PCI_MEM_SEL %d\n", __func__, val); >>> + if (val< VIRTIO_PCI_MEM_MAX) >>> + vdev->mem_sel = val; >>> + break; >>> default: >>> fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n", >>> __func__, addr, val); >>> @@ -271,6 +282,14 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy >>> *proxy, uint32_t addr) >>> case VIRTIO_MSI_QUEUE_VECTOR: >>> ret = virtio_queue_vector(vdev, vdev->queue_sel); >>> break; >>> + case VIRTIO_PCI_MEM_SIZE: >>> + ret = virtio_mem_get_size(vdev, vdev->mem_sel); >>> + break; >>> + case VIRTIO_PCI_MEM_PFN: >>> + ret = virtio_mem_get_addr(vdev, vdev->mem_sel) >>> +>> VIRTIO_PCI_QUEUE_ADDR_SHIFT; >>> + break; >>> + >>> default: >>> break; >>> } >>> @@ -370,6 +389,30 @@ static void virtio_map(PCIDevice *pci_dev, int >>> region_num, >>> vdev->get_config(vdev, vdev->config); >>> } >>> >>> + >>> +static void virtio_setup_mem(PCIDevice *d, int region_num, >>> + pcibus_t addr, pcibus_t size, int type) >>> +{ >>> + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, d); >>> + VirtIODevice *vdev; >>> + >>> + printf("XXX setmem to %#x - %#x\n", (uint32_t)addr, (uint32_t)(addr + >>> size)); >>> + vdev = proxy->vdev; >>> + >>> + cpu_register_physical_memory(addr, size, IO_MEM_UNASSIGNED); >>> + cpu_register_physical_memory(addr, size, >>> vdev->vmem[region_num-2].ram_addr | IO_MEM_RAM); >>> + >>> + /* store the address of the region and get a pointer for it >>> + * that can access the mem region */ >>> + vdev->vmem[region_num-2].guest_phys_addr = (target_phys_addr_t)addr; >>> + >>> + if (vdev->vmem[region_num-2].on_map) { >>> + vdev->vmem[region_num-2].on_map(vdev, addr, size); >>> + } >>> + return; >>> + >>> +} >>> + >>> static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, >>> uint32_t val, int len) >>> { >>> @@ -406,6 +449,7 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, >>> VirtIODevice *vdev, >>> { >>> uint8_t *config; >>> uint32_t size; >>> + int i; >>> >>> proxy->vdev = vdev; >>> >>> @@ -426,6 +470,15 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, >>> VirtIODevice *vdev, >>> >>> config[0x3d] = 1; >>> >>> + // only BARs that were added with virtio_add_memory will be allocated >>> here >>> + for (i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++) { >>> + if (vdev->vmem[i].size> 0) { >>> + pci_register_bar(&proxy->pci_dev, 2 + i, vdev->vmem[i].size, >>> + PCI_BASE_ADDRESS_SPACE_MEMORY, virtio_setup_mem); >>> + } >>> + } >>> + >>> + >>> if (vdev->nvectors&& !msix_init(&proxy->pci_dev, vdev->nvectors, 1, >>> 0)) { >>> pci_register_bar(&proxy->pci_dev, 1, >>> msix_bar_size(&proxy->pci_dev), >>> diff --git a/hw/virtio.c b/hw/virtio.c >>> index 7c020a3..1f7924e 100644 >>> --- a/hw/virtio.c >>> +++ b/hw/virtio.c >>> @@ -549,6 +549,16 @@ target_phys_addr_t virtio_queue_get_addr(VirtIODevice >>> *vdev, int n) >>> return vdev->vq[n].pa; >>> } >>> >>> +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n) >>> +{ >>> + return vdev->vmem[n].guest_phys_addr; >>> +} >>> + >>> +int virtio_mem_get_size(VirtIODevice *vdev, int n) >>> +{ >>> + return vdev->vmem[n].size; >>> +} >>> + >>> int virtio_queue_get_num(VirtIODevice *vdev, int n) >>> { >>> return vdev->vq[n].vring.num; >>> @@ -592,6 +602,33 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int >>> queue_size, >>> return&vdev->vq[i]; >>> } >>> >>> +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, >>> + void (*handle_access)(VirtIODevice *, >>> + target_phys_addr_t, uint32_t)) >>> +{ >>> + int i; >>> + >>> + /* trace what virtio_fb_setmem does */ >>> + >>> + for (i = 0; i< VIRTIO_PCI_MEM_MAX; i++) { >>> + if (vdev->vmem[i].size == 0) { >>> + /* XXX: check that memory is a power of 2 */ >>> + vdev->vmem[i].size = mem_size * 1024 * 1024; /* presume size >>> is in MBs*/ >>> + vdev->vmem[i].ram_addr = qemu_ram_alloc(vdev->vmem[i].size); >>> + vdev->vmem[i].host_ptr = >>> qemu_get_ram_ptr(vdev->vmem[i].ram_addr); >>> + vdev->vmem[i].on_map = handle_access; >>> + break; >>> + } >>> + } >>> + >>> + return&vdev->vmem[i]; >>> +} >>> + >>> +void * virtio_get_memory_ptr(VirtMem * vmem) >>> +{ >>> + return vmem->host_ptr; >>> +} >>> + >>> void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) >>> { >>> /* Always notify when queue is empty (when feature acknowledge) */ >>> @@ -714,6 +751,7 @@ VirtIODevice *virtio_common_init(const char *name, >>> uint16_t device_id, >>> vdev->queue_sel = 0; >>> vdev->config_vector = VIRTIO_NO_VECTOR; >>> vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); >>> + vdev->vmem = qemu_mallocz(sizeof(VirtMem) * VIRTIO_PCI_MEM_MAX); >>> for(i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++) >>> vdev->vq[i].vector = VIRTIO_NO_VECTOR; >>> >>> diff --git a/hw/virtio.h b/hw/virtio.h >>> index 3baa2a3..02beb27 100644 >>> --- a/hw/virtio.h >>> +++ b/hw/virtio.h >>> @@ -82,6 +82,15 @@ typedef struct VirtQueueElement >>> struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; >>> } VirtQueueElement; >>> >>> +typedef struct VirtMem >>> +{ >>> + target_phys_addr_t guest_phys_addr; >>> + ram_addr_t ram_addr; >>> + int size; >>> + uint8_t * host_ptr; >>> + void (*on_map)(VirtIODevice *vdev, target_phys_addr_t, uint32_t); >>> +} VirtMem; >>> + >>> typedef struct { >>> void (*notify)(void * opaque, uint16_t vector); >>> void (*save_config)(void * opaque, QEMUFile *f); >>> @@ -93,6 +102,8 @@ typedef struct { >>> >>> #define VIRTIO_PCI_QUEUE_MAX 64 >>> >>> +#define VIRTIO_PCI_MEM_MAX 16 >>> + >>> #define VIRTIO_NO_VECTOR 0xffff >>> >>> struct VirtIODevice >>> @@ -101,6 +112,7 @@ struct VirtIODevice >>> uint8_t status; >>> uint8_t isr; >>> uint16_t queue_sel; >>> + uint16_t mem_sel; >>> uint32_t guest_features; >>> size_t config_len; >>> void *config; >>> @@ -113,6 +125,7 @@ struct VirtIODevice >>> void (*set_config)(VirtIODevice *vdev, const uint8_t *config); >>> void (*reset)(VirtIODevice *vdev); >>> VirtQueue *vq; >>> + VirtMem * vmem; >>> const VirtIOBindings *binding; >>> void *binding_opaque; >>> uint16_t device_id; >>> @@ -122,6 +135,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int >>> queue_size, >>> void (*handle_output)(VirtIODevice *, >>> VirtQueue *)); >>> >>> +void * virtio_get_memory_ptr(VirtMem * vmem); >>> +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, >>> + void (*handle_access)(VirtIODevice *, >>> + target_phys_addr_t, >>> + uint32_t)); >>> + >>> void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, >>> unsigned int len); >>> void virtqueue_flush(VirtQueue *vq, unsigned int count); >>> @@ -169,6 +188,9 @@ void virtio_update_irq(VirtIODevice *vdev); >>> void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings >>> *binding, >>> void *opaque); >>> >>> +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n); >>> +int virtio_mem_get_size(VirtIODevice *vdev, int n); >>> + >>> /* Base devices. */ >>> VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf); >>> VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf); >>> >>> >>> >>> >> >>
On 02/24/2010 09:13 AM, Anthony Liguori wrote: >> to work on that if we can agree on what the underlying device looks >> like whether it should be VirtIO or just PCI. > Agreed, you had suggested uio_pci for my PCI driver and I'd be happy > > I feel pretty strongly that we shouldn't add shared memory to virtio. > I'm pretty sure Christian would too because it eliminates the > possibility of implementing a copy-based transport for virtio > (virtio-over-ethernet). Actually, ignore my crazy justification based on virtio-over-ethernet :-) Certain hypervisors (like Xen and PHYP) have very limited/non-existent support for shared memory. In the case of Xen, there's a very small pool that can be used for DMA buffers. In the case of PHYP, you have hardware accelerated copying for all I/O. Adding shared memory to virtio means that virtio could not be supported (or at least, all virtio devices) on these hypervisors. Regards, Anthony Liguori
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index f3373ae..e71bf64 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -55,13 +55,13 @@ /* MSI-X registers: only enabled if MSI-X is enabled. */ /* A 16-bit vector for configuration changes. */ -#define VIRTIO_MSI_CONFIG_VECTOR 20 +#define VIRTIO_MSI_CONFIG_VECTOR 32 /* A 16-bit vector for selected queue notifications. */ -#define VIRTIO_MSI_QUEUE_VECTOR 22 +#define VIRTIO_MSI_QUEUE_VECTOR 34 /* Config space size */ -#define VIRTIO_PCI_CONFIG_NOMSI 20 -#define VIRTIO_PCI_CONFIG_MSI 24 +#define VIRTIO_PCI_CONFIG_NOMSI 32 +#define VIRTIO_PCI_CONFIG_MSI 36 #define VIRTIO_PCI_REGION_SIZE(dev) (msix_present(dev) ? \ VIRTIO_PCI_CONFIG_MSI : \ VIRTIO_PCI_CONFIG_NOMSI) @@ -72,6 +72,12 @@ VIRTIO_PCI_CONFIG_MSI : \ VIRTIO_PCI_CONFIG_NOMSI) +#define VIRTIO_PCI_MEM_SEL 20 + +#define VIRTIO_PCI_MEM_SIZE 24 + +#define VIRTIO_PCI_MEM_PFN 28 + /* Virtio ABI version, if we increment this, we break the guest driver. */ #define VIRTIO_PCI_ABI_VERSION 0 @@ -227,6 +233,11 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) val = VIRTIO_NO_VECTOR; virtio_queue_set_vector(vdev, vdev->queue_sel, val); break; + case VIRTIO_PCI_MEM_SEL: + fprintf(stderr, "%s: VIRTIO_PCI_MEM_SEL %d\n", __func__, val); + if (val < VIRTIO_PCI_MEM_MAX) + vdev->mem_sel = val; + break; default: fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n", __func__, addr, val); @@ -271,6 +282,14 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr) case VIRTIO_MSI_QUEUE_VECTOR: ret = virtio_queue_vector(vdev, vdev->queue_sel); break; + case VIRTIO_PCI_MEM_SIZE: + ret = virtio_mem_get_size(vdev, vdev->mem_sel); + break; + case VIRTIO_PCI_MEM_PFN: + ret = virtio_mem_get_addr(vdev, vdev->mem_sel) + >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + break; + default: break; } @@ -370,6 +389,30 @@ static void virtio_map(PCIDevice *pci_dev, int region_num, vdev->get_config(vdev, vdev->config); } + +static void virtio_setup_mem(PCIDevice *d, int region_num, + pcibus_t addr, pcibus_t size, int type) +{ + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, d); + VirtIODevice *vdev; + + printf("XXX setmem to %#x - %#x\n", (uint32_t)addr, (uint32_t)(addr + size)); + vdev = proxy->vdev; + + cpu_register_physical_memory(addr, size, IO_MEM_UNASSIGNED); + cpu_register_physical_memory(addr, size, vdev->vmem[region_num-2].ram_addr | IO_MEM_RAM); + + /* store the address of the region and get a pointer for it + * that can access the mem region */ + vdev->vmem[region_num-2].guest_phys_addr = (target_phys_addr_t)addr; + + if (vdev->vmem[region_num-2].on_map) { + vdev->vmem[region_num-2].on_map(vdev, addr, size); + } + return; + +} + static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len) { @@ -406,6 +449,7 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, { uint8_t *config; uint32_t size; + int i; proxy->vdev = vdev; @@ -426,6 +470,15 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, config[0x3d] = 1; + // only BARs that were added with virtio_add_memory will be allocated here + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { + if (vdev->vmem[i].size > 0) { + pci_register_bar(&proxy->pci_dev, 2 + i, vdev->vmem[i].size, + PCI_BASE_ADDRESS_SPACE_MEMORY, virtio_setup_mem); + } + } + + if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) { pci_register_bar(&proxy->pci_dev, 1, msix_bar_size(&proxy->pci_dev), diff --git a/hw/virtio.c b/hw/virtio.c index 7c020a3..1f7924e 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -549,6 +549,16 @@ target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n) return vdev->vq[n].pa; } +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n) +{ + return vdev->vmem[n].guest_phys_addr; +} + +int virtio_mem_get_size(VirtIODevice *vdev, int n) +{ + return vdev->vmem[n].size; +} + int virtio_queue_get_num(VirtIODevice *vdev, int n) { return vdev->vq[n].vring.num; @@ -592,6 +602,33 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, return &vdev->vq[i]; } +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, + void (*handle_access)(VirtIODevice *, + target_phys_addr_t, uint32_t)) +{ + int i; + + /* trace what virtio_fb_setmem does */ + + for (i = 0; i < VIRTIO_PCI_MEM_MAX; i++) { + if (vdev->vmem[i].size == 0) { + /* XXX: check that memory is a power of 2 */ + vdev->vmem[i].size = mem_size * 1024 * 1024; /* presume size is in MBs*/ + vdev->vmem[i].ram_addr = qemu_ram_alloc(vdev->vmem[i].size); + vdev->vmem[i].host_ptr = qemu_get_ram_ptr(vdev->vmem[i].ram_addr); + vdev->vmem[i].on_map = handle_access; + break; + } + } + + return &vdev->vmem[i]; +} + +void * virtio_get_memory_ptr(VirtMem * vmem) +{ + return vmem->host_ptr; +} + void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) { /* Always notify when queue is empty (when feature acknowledge) */ @@ -714,6 +751,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, vdev->queue_sel = 0; vdev->config_vector = VIRTIO_NO_VECTOR; vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); + vdev->vmem = qemu_mallocz(sizeof(VirtMem) * VIRTIO_PCI_MEM_MAX); for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) vdev->vq[i].vector = VIRTIO_NO_VECTOR; diff --git a/hw/virtio.h b/hw/virtio.h index 3baa2a3..02beb27 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -82,6 +82,15 @@ typedef struct VirtQueueElement struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; } VirtQueueElement; +typedef struct VirtMem +{ + target_phys_addr_t guest_phys_addr; + ram_addr_t ram_addr; + int size; + uint8_t * host_ptr; + void (*on_map)(VirtIODevice *vdev, target_phys_addr_t, uint32_t); +} VirtMem; + typedef struct { void (*notify)(void * opaque, uint16_t vector); void (*save_config)(void * opaque, QEMUFile *f); @@ -93,6 +102,8 @@ typedef struct { #define VIRTIO_PCI_QUEUE_MAX 64 +#define VIRTIO_PCI_MEM_MAX 16 + #define VIRTIO_NO_VECTOR 0xffff struct VirtIODevice @@ -101,6 +112,7 @@ struct VirtIODevice uint8_t status; uint8_t isr; uint16_t queue_sel; + uint16_t mem_sel; uint32_t guest_features; size_t config_len; void *config; @@ -113,6 +125,7 @@ struct VirtIODevice void (*set_config)(VirtIODevice *vdev, const uint8_t *config); void (*reset)(VirtIODevice *vdev); VirtQueue *vq; + VirtMem * vmem; const VirtIOBindings *binding; void *binding_opaque; uint16_t device_id; @@ -122,6 +135,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, void (*handle_output)(VirtIODevice *, VirtQueue *)); +void * virtio_get_memory_ptr(VirtMem * vmem); +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size, + void (*handle_access)(VirtIODevice *, + target_phys_addr_t, + uint32_t)); + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len); void virtqueue_flush(VirtQueue *vq, unsigned int count); @@ -169,6 +188,9 @@ void virtio_update_irq(VirtIODevice *vdev); void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding, void *opaque); +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n); +int virtio_mem_get_size(VirtIODevice *vdev, int n); + /* Base devices. */ VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf); VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf);