diff mbox

[RFC] vfio: Add support for mmapping sub-page MMIO BARs

Message ID 1464101265-28080-1-git-send-email-xyjxie@linux.vnet.ibm.com
State New
Headers show

Commit Message

Yongji Xie May 24, 2016, 2:47 p.m. UTC
Now the kernel patch [1] allows VFIO to mmap sub-page BARs.
This is the corresponding QEMU patch. With those patches
applied, we could passthrough sub-page BARs to guest, which
can help to improve IO performance for some devices.

In this patch, we expand MemoryRegions of these sub-page
MMIO BARs to PAGE_SIZE in vfio_pci_write_config(), so that
the BARs could be passed to KVM ioctl KVM_SET_USER_MEMORY_REGION
with a valid size. The expanding size will be recovered when
the base address of sub-page BAR is changed and not page aligned
any more in guest. And we also set the priority of these BARs'
memory regions to zero in case of overlap with BARs which share
the same page with sub-page BARs in guest.

[1] http://www.spinics.net/lists/kvm/msg132382.html

Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com>
---
 hw/vfio/common.c |    3 +--
 hw/vfio/pci.c    |   69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 88154a1..b898532 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -522,8 +522,7 @@  int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                               region, name, region->size);
 
         if (!vbasedev->no_mmap &&
-            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
-            !(region->size & ~qemu_real_host_page_mask)) {
+            region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
 
             region->nr_mmaps = 1;
             region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d091d8c..edf9c8d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1057,6 +1057,58 @@  static const MemoryRegionOps vfio_vga_ops = {
 };
 
 /*
+ * Expand memory regions of sub-page(size < PAGE_SIZE) MMIO BARs to page
+ * size if the BARs are in an exclusive page in host. And we should set
+ * the priority of these memory regions to zero in case of overlap with
+ * BARs which share the same page with sub-page BARs in guest. If the
+ * base address of sub-page BARs are changed and not page aligned any
+ * more, we should recover their sizes.
+ */
+static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    MemoryRegion *mmap_mr;
+    MemoryRegion *mr;
+    PCIIORegion *r;
+    pcibus_t bar_addr;
+
+    if (vdev->bars[bar].region.nr_mmaps != 1) {
+        return;
+    }
+
+    r = &pdev->io_regions[bar];
+    bar_addr = r->addr;
+    if (bar_addr == PCI_BAR_UNMAPPED) {
+        return;
+    }
+
+    memory_region_transaction_begin();
+    mr = vdev->bars[bar].region.mem;
+    mmap_mr = &vdev->bars[bar].region.mmaps[0].mem;
+    if (memory_region_size(mr) == qemu_real_host_page_size) {
+        if (bar_addr & ~qemu_real_host_page_mask) {
+            memory_region_set_size(mr, r->size);
+            memory_region_set_size(mmap_mr, r->size);
+        } else if (memory_region_is_mapped(mr)) {
+            memory_region_del_subregion(r->address_space, mr);
+            memory_region_add_subregion_overlap(r->address_space,
+                                                bar_addr, mr, 0);
+        }
+    } else {
+        if (!(bar_addr & ~qemu_real_host_page_mask) &&
+            memory_region_is_mapped(mr) &&
+            vdev->bars[bar].region.mmaps[0].mmap) {
+            memory_region_del_subregion(r->address_space, mr);
+            memory_region_set_size(mr, qemu_real_host_page_size);
+            memory_region_set_size(mmap_mr, qemu_real_host_page_size);
+            memory_region_add_subregion_overlap(r->address_space,
+                                                bar_addr, mr, 0);
+        }
+    }
+    memory_region_transaction_commit();
+}
+
+/*
  * PCI config space
  */
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
@@ -1139,6 +1191,23 @@  void vfio_pci_write_config(PCIDevice *pdev,
         } else if (was_enabled && !is_enabled) {
             vfio_msix_disable(vdev);
         }
+    } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
+        range_covers_byte(addr, len, PCI_COMMAND)) {
+        pcibus_t old_addr[PCI_NUM_REGIONS - 1];
+        int bar;
+
+        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+            old_addr[bar] = pdev->io_regions[bar].addr;
+        }
+
+        pci_default_write_config(pdev, addr, val, len);
+
+        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+            if (old_addr[bar] != pdev->io_regions[bar].addr &&
+                pdev->io_regions[bar].size > 0 &&
+                pdev->io_regions[bar].size < qemu_real_host_page_size)
+                vfio_sub_page_bar_update_mapping(pdev, bar);
+        }
     } else {
         /* Write everything to QEMU to keep emulated bits correct */
         pci_default_write_config(pdev, addr, val, len);