diff mbox series

[RFC,v9,14/29] vfio: Introduce helpers to DMA map/unmap a RAM section

Message ID 20210411120912.15770-15-eric.auger@redhat.com
State New
Headers show
Series vSMMUv3/pSMMUv3 2 stage VFIO integration | expand

Commit Message

Eric Auger April 11, 2021, 12:08 p.m. UTC
Let's introduce two helpers that allow to DMA map/unmap a RAM
section. Those helpers will be called for nested stage setup in
another call site. Also the vfio_listener_region_add/del()
structure may be clearer.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v8 -> v9
- rebase on top of
  1eb7f642750c ("vfio: Support host translation granule size")

v5 -> v6:
- add Error **
---
 hw/vfio/common.c     | 199 +++++++++++++++++++++++++------------------
 hw/vfio/trace-events |   4 +-
 2 files changed, 119 insertions(+), 84 deletions(-)

Comments

Kunkun Jiang April 27, 2021, 2:05 p.m. UTC | #1
Hi Eric,

On 2021/4/11 20:08, Eric Auger wrote:
> Let's introduce two helpers that allow to DMA map/unmap a RAM
> section. Those helpers will be called for nested stage setup in
> another call site. Also the vfio_listener_region_add/del()
> structure may be clearer.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>
> ---
>
> v8 -> v9
> - rebase on top of
>    1eb7f642750c ("vfio: Support host translation granule size")
>
> v5 -> v6:
> - add Error **
> ---
>   hw/vfio/common.c     | 199 +++++++++++++++++++++++++------------------
>   hw/vfio/trace-events |   4 +-
>   2 files changed, 119 insertions(+), 84 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index a8f835328e..0cd7ef2139 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -662,13 +662,126 @@ hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end)
>       return NULL;
>   }
>   
> +static int vfio_dma_map_ram_section(VFIOContainer *container,
> +                                    MemoryRegionSection *section, Error **err)
> +{
> +    VFIOHostDMAWindow *hostwin;
> +    Int128 llend, llsize;
> +    hwaddr iova, end;
> +    void *vaddr;
> +    int ret;
> +
> +    assert(memory_region_is_ram(section->mr));
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> +    end = int128_get64(int128_sub(llend, int128_one()));
> +
> +    vaddr = memory_region_get_ram_ptr(section->mr) +
> +            section->offset_within_region +
> +            (iova - section->offset_within_address_space);
> +
> +    hostwin = hostwin_from_range(container, iova, end);
> +    if (!hostwin) {
> +        error_setg(err, "Container %p can't map guest IOVA region"
> +                   " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
> +        return -EFAULT;
> +    }
> +
> +    trace_vfio_dma_map_ram(iova, end, vaddr);
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    if (memory_region_is_ram_device(section->mr)) {
> +        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> +
> +        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
> +            trace_vfio_listener_region_add_no_dma_map(
> +                memory_region_name(section->mr),
> +                section->offset_within_address_space,
> +                int128_getlo(section->size),
> +                pgmask + 1);
> +            return 0;
> +        }
> +    }
> +
> +    ret = vfio_dma_map(container, iova, int128_get64(llsize),
> +                       vaddr, section->readonly);
> +    if (ret) {
> +        error_setg(err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> +                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
> +                   container, iova, int128_get64(llsize), vaddr, ret);
> +        if (memory_region_is_ram_device(section->mr)) {
> +            /* Allow unexpected mappings not to be fatal for RAM devices */
> +            error_report_err(*err);
> +            return 0;
> +        }
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +static void vfio_dma_unmap_ram_section(VFIOContainer *container,
> +                                       MemoryRegionSection *section)
> +{
> +    Int128 llend, llsize;
> +    hwaddr iova, end;
> +    bool try_unmap = true;
> +    int ret;
> +
> +    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +    end = int128_get64(int128_sub(llend, int128_one()));
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    trace_vfio_dma_unmap_ram(iova, end);
> +
> +    if (memory_region_is_ram_device(section->mr)) {
> +        hwaddr pgmask;
> +        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
> +
> +        assert(hostwin); /* or region_add() would have failed */
> +
> +        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> +        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
> +    }
> +
> +    if (try_unmap) {
> +        if (int128_eq(llsize, int128_2_64())) {
> +            /* The unmap ioctl doesn't accept a full 64-bit span. */
> +            llsize = int128_rshift(llsize, 1);
> +            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> +            if (ret) {
> +                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> +                             "0x%"HWADDR_PRIx") = %d (%m)",
> +                             container, iova, int128_get64(llsize), ret);
> +            }
> +            iova += int128_get64(llsize);
> +        }
> +        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> +        if (ret) {
> +            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> +                         "0x%"HWADDR_PRIx") = %d (%m)",
> +                         container, iova, int128_get64(llsize), ret);
> +        }
> +    }
> +}
> +
>   static void vfio_listener_region_add(MemoryListener *listener,
>                                        MemoryRegionSection *section)
>   {
>       VFIOContainer *container = container_of(listener, VFIOContainer, listener);
>       hwaddr iova, end;
> -    Int128 llend, llsize;
> -    void *vaddr;
> +    Int128 llend;
>       int ret;
>       VFIOHostDMAWindow *hostwin;
>       Error *err = NULL;
> @@ -814,39 +927,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
>       }
>   
>       /* Here we assume that memory_region_is_ram(section->mr)==true */
> -
> -    vaddr = memory_region_get_ram_ptr(section->mr) +
> -            section->offset_within_region +
> -            (iova - section->offset_within_address_space);
> -
> -    trace_vfio_listener_region_add_ram(iova, end, vaddr);
> -
> -    llsize = int128_sub(llend, int128_make64(iova));
> -
> -    if (memory_region_is_ram_device(section->mr)) {
> -        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> -
> -        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
> -            trace_vfio_listener_region_add_no_dma_map(
> -                memory_region_name(section->mr),
> -                section->offset_within_address_space,
> -                int128_getlo(section->size),
> -                pgmask + 1);
> -            return;
> -        }
> -    }
> -
> -    ret = vfio_dma_map(container, iova, int128_get64(llsize),
> -                       vaddr, section->readonly);
> -    if (ret) {
> -        error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> -                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
> -                   container, iova, int128_get64(llsize), vaddr, ret);
> -        if (memory_region_is_ram_device(section->mr)) {
> -            /* Allow unexpected mappings not to be fatal for RAM devices */
> -            error_report_err(err);
> -            return;
> -        }
> +    if (vfio_dma_map_ram_section(container, section, &err)) {
>           goto fail;
>       }
>   
> @@ -880,10 +961,6 @@ static void vfio_listener_region_del(MemoryListener *listener,
>                                        MemoryRegionSection *section)
>   {
>       VFIOContainer *container = container_of(listener, VFIOContainer, listener);
> -    hwaddr iova, end;
> -    Int128 llend, llsize;
> -    int ret;
> -    bool try_unmap = true;
>   
>       if (vfio_listener_skipped_section(section)) {
>           trace_vfio_listener_region_del_skip(
> @@ -923,49 +1000,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
>            */
>       }
There are some codes of vfio_listener_region_del() that just doesn't 
show up.
I post it below.

> if (memory_region_is_iommu(section->mr)) {
>         VFIOGuestIOMMU *giommu;
>
>         QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
>             if (MEMORY_REGION(giommu->iommu) == section->mr &&
>                 giommu->n.start == section->offset_within_region) {
> memory_region_unregister_iommu_notifier(section->mr,
> &giommu->n);
>                 QLIST_REMOVE(giommu, giommu_next);
>                 g_free(giommu);
>                 break;
>             }
>         }
>
>         /*
>          * FIXME: We assume the one big unmap below is adequate to
>          * remove any individual page mappings in the IOMMU which
>          * might have been copied into VFIO. This works for a page table
>          * based IOMMU where a big unmap flattens a large range of 
> IO-PTEs.
>          * That may not be true for all IOMMU types.
>          */
>     }
I think we need a check here. If it is nested mode, just return after
g_free(giommu). Because in nested mode, stage 2 (gpa->hpa) and the
stage 1 (giova->gpa) are set separately.

When hot delete a pci device,  we are going to call
vfio_listener_region_del() and vfio_prereg_listener_region_del().
So it's not appropriate to unmap RAM section in
vfio_listener_region_del(). The RAM section will be unmap in
vfio_prereg_listener_region_del().

Thanks,
Kunkun Jiang
>   
> -    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
> -    llend = int128_make64(section->offset_within_address_space);
> -    llend = int128_add(llend, section->size);
> -    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
> -
> -    if (int128_ge(int128_make64(iova), llend)) {
> -        return;
> -    }
> -    end = int128_get64(int128_sub(llend, int128_one()));
> -
> -    llsize = int128_sub(llend, int128_make64(iova));
> -
> -    trace_vfio_listener_region_del(iova, end);
> -
> -    if (memory_region_is_ram_device(section->mr)) {
> -        hwaddr pgmask;
> -        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
> -
> -        assert(hostwin); /* or region_add() would have failed */
> -
> -        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> -        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
> -    }
> -
> -    if (try_unmap) {
> -        if (int128_eq(llsize, int128_2_64())) {
> -            /* The unmap ioctl doesn't accept a full 64-bit span. */
> -            llsize = int128_rshift(llsize, 1);
> -            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> -            if (ret) {
> -                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> -                             "0x%"HWADDR_PRIx") = %d (%m)",
> -                             container, iova, int128_get64(llsize), ret);
> -            }
> -            iova += int128_get64(llsize);
> -        }
> -        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> -        if (ret) {
> -            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> -                         "0x%"HWADDR_PRIx") = %d (%m)",
> -                         container, iova, int128_get64(llsize), ret);
> -        }
> -    }
> +    vfio_dma_unmap_ram_section(container, section);
>   
>       memory_region_unref(section->mr);
>   
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 2a41326c0f..936d29d150 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -99,10 +99,10 @@ vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i
>   vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64
>   vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
>   vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
> -vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
> +vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
>   vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
>   vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
> -vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
> +vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
>   vfio_disconnect_container(int fd) "close container->fd=%d"
>   vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d"
>   vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d"
Kunkun Jiang Sept. 3, 2021, 8:22 a.m. UTC | #2
Hi Eric,

On 2021/4/11 20:08, Eric Auger wrote:
> Let's introduce two helpers that allow to DMA map/unmap a RAM
> section. Those helpers will be called for nested stage setup in
> another call site. Also the vfio_listener_region_add/del()
> structure may be clearer.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>
> ---
>
> v8 -> v9
> - rebase on top of
>    1eb7f642750c ("vfio: Support host translation granule size")
>
> v5 -> v6:
> - add Error **
> ---
>   hw/vfio/common.c     | 199 +++++++++++++++++++++++++------------------
>   hw/vfio/trace-events |   4 +-
>   2 files changed, 119 insertions(+), 84 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index a8f835328e..0cd7ef2139 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -662,13 +662,126 @@ hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end)
>       return NULL;
>   }
>   
> +static int vfio_dma_map_ram_section(VFIOContainer *container,
> +                                    MemoryRegionSection *section, Error **err)
> +{
> +    VFIOHostDMAWindow *hostwin;
> +    Int128 llend, llsize;
> +    hwaddr iova, end;
> +    void *vaddr;
> +    int ret;
> +
> +    assert(memory_region_is_ram(section->mr));
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
Is there any special meaning for using TRAGET_PAGE_ALIGN here?
REAL_HOST_PAGE_ALIGN is used in vfio_listener_region_add.

And I think a check should be added here if using REAL_HOST_PAGE_ALIGN.
> if (int128_ge(int128_make64(iova), llend)) {
>      return;
> }

It will cause an error log or 'assert(r ==a )' of int128_get64 by calling
vfio_prereg_listener_region_add in some scenarios. Some devices' BAR
may map MSI-X structures and others in one host page.

By the way, is this set of patch to be updated after "/dev/iommu" is
sent out?

Thanks,
Kunkun Jiang

> +    end = int128_get64(int128_sub(llend, int128_one()));
> +
> +    vaddr = memory_region_get_ram_ptr(section->mr) +
> +            section->offset_within_region +
> +            (iova - section->offset_within_address_space);
> +
> +    hostwin = hostwin_from_range(container, iova, end);
> +    if (!hostwin) {
> +        error_setg(err, "Container %p can't map guest IOVA region"
> +                   " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
> +        return -EFAULT;
> +    }
> +
> +    trace_vfio_dma_map_ram(iova, end, vaddr);
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    if (memory_region_is_ram_device(section->mr)) {
> +        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> +
> +        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
> +            trace_vfio_listener_region_add_no_dma_map(
> +                memory_region_name(section->mr),
> +                section->offset_within_address_space,
> +                int128_getlo(section->size),
> +                pgmask + 1);
> +            return 0;
> +        }
> +    }
> +
> +    ret = vfio_dma_map(container, iova, int128_get64(llsize),
> +                       vaddr, section->readonly);
> +    if (ret) {
> +        error_setg(err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> +                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
> +                   container, iova, int128_get64(llsize), vaddr, ret);
> +        if (memory_region_is_ram_device(section->mr)) {
> +            /* Allow unexpected mappings not to be fatal for RAM devices */
> +            error_report_err(*err);
> +            return 0;
> +        }
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +static void vfio_dma_unmap_ram_section(VFIOContainer *container,
> +                                       MemoryRegionSection *section)
> +{
> +    Int128 llend, llsize;
> +    hwaddr iova, end;
> +    bool try_unmap = true;
> +    int ret;
> +
> +    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +    end = int128_get64(int128_sub(llend, int128_one()));
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    trace_vfio_dma_unmap_ram(iova, end);
> +
> +    if (memory_region_is_ram_device(section->mr)) {
> +        hwaddr pgmask;
> +        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
> +
> +        assert(hostwin); /* or region_add() would have failed */
> +
> +        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> +        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
> +    }
> +
> +    if (try_unmap) {
> +        if (int128_eq(llsize, int128_2_64())) {
> +            /* The unmap ioctl doesn't accept a full 64-bit span. */
> +            llsize = int128_rshift(llsize, 1);
> +            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> +            if (ret) {
> +                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> +                             "0x%"HWADDR_PRIx") = %d (%m)",
> +                             container, iova, int128_get64(llsize), ret);
> +            }
> +            iova += int128_get64(llsize);
> +        }
> +        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> +        if (ret) {
> +            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> +                         "0x%"HWADDR_PRIx") = %d (%m)",
> +                         container, iova, int128_get64(llsize), ret);
> +        }
> +    }
> +}
> +
>   static void vfio_listener_region_add(MemoryListener *listener,
>                                        MemoryRegionSection *section)
>   {
>       VFIOContainer *container = container_of(listener, VFIOContainer, listener);
>       hwaddr iova, end;
> -    Int128 llend, llsize;
> -    void *vaddr;
> +    Int128 llend;
>       int ret;
>       VFIOHostDMAWindow *hostwin;
>       Error *err = NULL;
> @@ -814,39 +927,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
>       }
>   
>       /* Here we assume that memory_region_is_ram(section->mr)==true */
> -
> -    vaddr = memory_region_get_ram_ptr(section->mr) +
> -            section->offset_within_region +
> -            (iova - section->offset_within_address_space);
> -
> -    trace_vfio_listener_region_add_ram(iova, end, vaddr);
> -
> -    llsize = int128_sub(llend, int128_make64(iova));
> -
> -    if (memory_region_is_ram_device(section->mr)) {
> -        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> -
> -        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
> -            trace_vfio_listener_region_add_no_dma_map(
> -                memory_region_name(section->mr),
> -                section->offset_within_address_space,
> -                int128_getlo(section->size),
> -                pgmask + 1);
> -            return;
> -        }
> -    }
> -
> -    ret = vfio_dma_map(container, iova, int128_get64(llsize),
> -                       vaddr, section->readonly);
> -    if (ret) {
> -        error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> -                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
> -                   container, iova, int128_get64(llsize), vaddr, ret);
> -        if (memory_region_is_ram_device(section->mr)) {
> -            /* Allow unexpected mappings not to be fatal for RAM devices */
> -            error_report_err(err);
> -            return;
> -        }
> +    if (vfio_dma_map_ram_section(container, section, &err)) {
>           goto fail;
>       }
>   
> @@ -880,10 +961,6 @@ static void vfio_listener_region_del(MemoryListener *listener,
>                                        MemoryRegionSection *section)
>   {
>       VFIOContainer *container = container_of(listener, VFIOContainer, listener);
> -    hwaddr iova, end;
> -    Int128 llend, llsize;
> -    int ret;
> -    bool try_unmap = true;
>   
>       if (vfio_listener_skipped_section(section)) {
>           trace_vfio_listener_region_del_skip(
> @@ -923,49 +1000,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
>            */
>       }
>   
> -    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
> -    llend = int128_make64(section->offset_within_address_space);
> -    llend = int128_add(llend, section->size);
> -    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
> -
> -    if (int128_ge(int128_make64(iova), llend)) {
> -        return;
> -    }
> -    end = int128_get64(int128_sub(llend, int128_one()));
> -
> -    llsize = int128_sub(llend, int128_make64(iova));
> -
> -    trace_vfio_listener_region_del(iova, end);
> -
> -    if (memory_region_is_ram_device(section->mr)) {
> -        hwaddr pgmask;
> -        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
> -
> -        assert(hostwin); /* or region_add() would have failed */
> -
> -        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> -        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
> -    }
> -
> -    if (try_unmap) {
> -        if (int128_eq(llsize, int128_2_64())) {
> -            /* The unmap ioctl doesn't accept a full 64-bit span. */
> -            llsize = int128_rshift(llsize, 1);
> -            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> -            if (ret) {
> -                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> -                             "0x%"HWADDR_PRIx") = %d (%m)",
> -                             container, iova, int128_get64(llsize), ret);
> -            }
> -            iova += int128_get64(llsize);
> -        }
> -        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> -        if (ret) {
> -            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> -                         "0x%"HWADDR_PRIx") = %d (%m)",
> -                         container, iova, int128_get64(llsize), ret);
> -        }
> -    }
> +    vfio_dma_unmap_ram_section(container, section);
>   
>       memory_region_unref(section->mr);
>   
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 2a41326c0f..936d29d150 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -99,10 +99,10 @@ vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i
>   vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64
>   vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
>   vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
> -vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
> +vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
>   vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
>   vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
> -vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
> +vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
>   vfio_disconnect_container(int fd) "close container->fd=%d"
>   vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d"
>   vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d"
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index a8f835328e..0cd7ef2139 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -662,13 +662,126 @@  hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end)
     return NULL;
 }
 
+static int vfio_dma_map_ram_section(VFIOContainer *container,
+                                    MemoryRegionSection *section, Error **err)
+{
+    VFIOHostDMAWindow *hostwin;
+    Int128 llend, llsize;
+    hwaddr iova, end;
+    void *vaddr;
+    int ret;
+
+    assert(memory_region_is_ram(section->mr));
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+    end = int128_get64(int128_sub(llend, int128_one()));
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    hostwin = hostwin_from_range(container, iova, end);
+    if (!hostwin) {
+        error_setg(err, "Container %p can't map guest IOVA region"
+                   " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
+        return -EFAULT;
+    }
+
+    trace_vfio_dma_map_ram(iova, end, vaddr);
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    if (memory_region_is_ram_device(section->mr)) {
+        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+
+        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
+            trace_vfio_listener_region_add_no_dma_map(
+                memory_region_name(section->mr),
+                section->offset_within_address_space,
+                int128_getlo(section->size),
+                pgmask + 1);
+            return 0;
+        }
+    }
+
+    ret = vfio_dma_map(container, iova, int128_get64(llsize),
+                       vaddr, section->readonly);
+    if (ret) {
+        error_setg(err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
+                   container, iova, int128_get64(llsize), vaddr, ret);
+        if (memory_region_is_ram_device(section->mr)) {
+            /* Allow unexpected mappings not to be fatal for RAM devices */
+            error_report_err(*err);
+            return 0;
+        }
+        return ret;
+    }
+    return 0;
+}
+
+static void vfio_dma_unmap_ram_section(VFIOContainer *container,
+                                       MemoryRegionSection *section)
+{
+    Int128 llend, llsize;
+    hwaddr iova, end;
+    bool try_unmap = true;
+    int ret;
+
+    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
+
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+    end = int128_get64(int128_sub(llend, int128_one()));
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    trace_vfio_dma_unmap_ram(iova, end);
+
+    if (memory_region_is_ram_device(section->mr)) {
+        hwaddr pgmask;
+        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
+
+        assert(hostwin); /* or region_add() would have failed */
+
+        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+    }
+
+    if (try_unmap) {
+        if (int128_eq(llsize, int128_2_64())) {
+            /* The unmap ioctl doesn't accept a full 64-bit span. */
+            llsize = int128_rshift(llsize, 1);
+            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+            if (ret) {
+                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+                             "0x%"HWADDR_PRIx") = %d (%m)",
+                             container, iova, int128_get64(llsize), ret);
+            }
+            iova += int128_get64(llsize);
+        }
+        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+        if (ret) {
+            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+                         "0x%"HWADDR_PRIx") = %d (%m)",
+                         container, iova, int128_get64(llsize), ret);
+        }
+    }
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
     hwaddr iova, end;
-    Int128 llend, llsize;
-    void *vaddr;
+    Int128 llend;
     int ret;
     VFIOHostDMAWindow *hostwin;
     Error *err = NULL;
@@ -814,39 +927,7 @@  static void vfio_listener_region_add(MemoryListener *listener,
     }
 
     /* Here we assume that memory_region_is_ram(section->mr)==true */
-
-    vaddr = memory_region_get_ram_ptr(section->mr) +
-            section->offset_within_region +
-            (iova - section->offset_within_address_space);
-
-    trace_vfio_listener_region_add_ram(iova, end, vaddr);
-
-    llsize = int128_sub(llend, int128_make64(iova));
-
-    if (memory_region_is_ram_device(section->mr)) {
-        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
-
-        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
-            trace_vfio_listener_region_add_no_dma_map(
-                memory_region_name(section->mr),
-                section->offset_within_address_space,
-                int128_getlo(section->size),
-                pgmask + 1);
-            return;
-        }
-    }
-
-    ret = vfio_dma_map(container, iova, int128_get64(llsize),
-                       vaddr, section->readonly);
-    if (ret) {
-        error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
-                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
-                   container, iova, int128_get64(llsize), vaddr, ret);
-        if (memory_region_is_ram_device(section->mr)) {
-            /* Allow unexpected mappings not to be fatal for RAM devices */
-            error_report_err(err);
-            return;
-        }
+    if (vfio_dma_map_ram_section(container, section, &err)) {
         goto fail;
     }
 
@@ -880,10 +961,6 @@  static void vfio_listener_region_del(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
-    hwaddr iova, end;
-    Int128 llend, llsize;
-    int ret;
-    bool try_unmap = true;
 
     if (vfio_listener_skipped_section(section)) {
         trace_vfio_listener_region_del_skip(
@@ -923,49 +1000,7 @@  static void vfio_listener_region_del(MemoryListener *listener,
          */
     }
 
-    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
-    llend = int128_make64(section->offset_within_address_space);
-    llend = int128_add(llend, section->size);
-    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
-
-    if (int128_ge(int128_make64(iova), llend)) {
-        return;
-    }
-    end = int128_get64(int128_sub(llend, int128_one()));
-
-    llsize = int128_sub(llend, int128_make64(iova));
-
-    trace_vfio_listener_region_del(iova, end);
-
-    if (memory_region_is_ram_device(section->mr)) {
-        hwaddr pgmask;
-        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
-
-        assert(hostwin); /* or region_add() would have failed */
-
-        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
-        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
-    }
-
-    if (try_unmap) {
-        if (int128_eq(llsize, int128_2_64())) {
-            /* The unmap ioctl doesn't accept a full 64-bit span. */
-            llsize = int128_rshift(llsize, 1);
-            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
-            if (ret) {
-                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
-                             "0x%"HWADDR_PRIx") = %d (%m)",
-                             container, iova, int128_get64(llsize), ret);
-            }
-            iova += int128_get64(llsize);
-        }
-        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
-        if (ret) {
-            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
-                         "0x%"HWADDR_PRIx") = %d (%m)",
-                         container, iova, int128_get64(llsize), ret);
-        }
-    }
+    vfio_dma_unmap_ram_section(container, section);
 
     memory_region_unref(section->mr);
 
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 2a41326c0f..936d29d150 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -99,10 +99,10 @@  vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i
 vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64
 vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
 vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
-vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
+vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
 vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
 vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
-vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
+vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
 vfio_disconnect_container(int fd) "close container->fd=%d"
 vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d"
 vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d"