diff mbox series

[v2,03/14] powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs

Message ID 20200911170738.82818-4-leobras.c@gmail.com (mailing list archive)
State Superseded, archived
Headers show
Series DDW Indirect Mapping | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (4b552a4cbf286ff9dcdab19153f3c1c7d1680fab)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 42 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Leonardo Brás Sept. 11, 2020, 5:07 p.m. UTC
Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org, 

Currently both iommu_alloc_coherent() and iommu_free_coherent() align the
desired allocation size to PAGE_SIZE, and gets system pages and IOMMU
mappings (TCEs) for that value.

When IOMMU_PAGE_SIZE < PAGE_SIZE, this behavior may cause unnecessary
TCEs to be created for mapping the whole system page.

Example:
- PAGE_SIZE = 64k, IOMMU_PAGE_SIZE() = 4k
- iommu_alloc_coherent() is called for 128 bytes
- 1 system page (64k) is allocated
- 16 IOMMU pages (16 x 4k) are allocated (16 TCEs used)

It would be enough to use a single TCE for this, so 15 TCEs are
wasted in the process.

Update iommu_*_coherent() to make sure the size alignment happens only
for IOMMU_PAGE_SIZE() before calling iommu_alloc() and iommu_free().

Also, on iommu_range_alloc(), replace ALIGN(n, 1 << tbl->it_page_shift)
with IOMMU_PAGE_ALIGN(n, tbl), which is easier to read and does the
same.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
---
 arch/powerpc/kernel/iommu.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

Comments

Alexey Kardashevskiy Sept. 29, 2020, 3:57 a.m. UTC | #1
On 12/09/2020 03:07, Leonardo Bras wrote:
> Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
> 
> Currently both iommu_alloc_coherent() and iommu_free_coherent() align the
> desired allocation size to PAGE_SIZE, and gets system pages and IOMMU
> mappings (TCEs) for that value.
> 
> When IOMMU_PAGE_SIZE < PAGE_SIZE, this behavior may cause unnecessary
> TCEs to be created for mapping the whole system page.
> 
> Example:
> - PAGE_SIZE = 64k, IOMMU_PAGE_SIZE() = 4k
> - iommu_alloc_coherent() is called for 128 bytes
> - 1 system page (64k) is allocated
> - 16 IOMMU pages (16 x 4k) are allocated (16 TCEs used)
> 
> It would be enough to use a single TCE for this, so 15 TCEs are
> wasted in the process.
> 
> Update iommu_*_coherent() to make sure the size alignment happens only
> for IOMMU_PAGE_SIZE() before calling iommu_alloc() and iommu_free().
> 
> Also, on iommu_range_alloc(), replace ALIGN(n, 1 << tbl->it_page_shift)
> with IOMMU_PAGE_ALIGN(n, tbl), which is easier to read and does the
> same.


This seems alright but rather unrelated to the series, probably makes 
sense to post it separately.

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> 
> Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
> ---
>   arch/powerpc/kernel/iommu.c | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index 9704f3f76e63..7961645a6980 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -237,10 +237,9 @@ static unsigned long iommu_range_alloc(struct device *dev,
>   	}
>   
>   	if (dev)
> -		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
> -				      1 << tbl->it_page_shift);
> +		boundary_size = IOMMU_PAGE_ALIGN(dma_get_seg_boundary(dev) + 1, tbl);
>   	else
> -		boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
> +		boundary_size = IOMMU_PAGE_ALIGN(1UL << 32, tbl);
>   	/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
>   
>   	n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
> @@ -858,6 +857,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
>   	unsigned int order;
>   	unsigned int nio_pages, io_order;
>   	struct page *page;
> +	size_t size_io = size;
>   
>   	size = PAGE_ALIGN(size);
>   	order = get_order(size);
> @@ -884,8 +884,9 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
>   	memset(ret, 0, size);
>   
>   	/* Set up tces to cover the allocated range */
> -	nio_pages = size >> tbl->it_page_shift;
> -	io_order = get_iommu_order(size, tbl);
> +	size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
> +	nio_pages = size_io >> tbl->it_page_shift;
> +	io_order = get_iommu_order(size_io, tbl);
>   	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
>   			      mask >> tbl->it_page_shift, io_order, 0);
>   	if (mapping == DMA_MAPPING_ERROR) {
> @@ -900,10 +901,9 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
>   			 void *vaddr, dma_addr_t dma_handle)
>   {
>   	if (tbl) {
> -		unsigned int nio_pages;
> +		size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
> +		unsigned int nio_pages = size_io >> tbl->it_page_shift;
>   
> -		size = PAGE_ALIGN(size);
> -		nio_pages = size >> tbl->it_page_shift;
>   		iommu_free(tbl, dma_handle, nio_pages);
>   		size = PAGE_ALIGN(size);
>   		free_pages((unsigned long)vaddr, get_order(size));
>
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 9704f3f76e63..7961645a6980 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -237,10 +237,9 @@  static unsigned long iommu_range_alloc(struct device *dev,
 	}
 
 	if (dev)
-		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-				      1 << tbl->it_page_shift);
+		boundary_size = IOMMU_PAGE_ALIGN(dma_get_seg_boundary(dev) + 1, tbl);
 	else
-		boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
+		boundary_size = IOMMU_PAGE_ALIGN(1UL << 32, tbl);
 	/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
 
 	n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
@@ -858,6 +857,7 @@  void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 	unsigned int order;
 	unsigned int nio_pages, io_order;
 	struct page *page;
+	size_t size_io = size;
 
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
@@ -884,8 +884,9 @@  void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 	memset(ret, 0, size);
 
 	/* Set up tces to cover the allocated range */
-	nio_pages = size >> tbl->it_page_shift;
-	io_order = get_iommu_order(size, tbl);
+	size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
+	nio_pages = size_io >> tbl->it_page_shift;
+	io_order = get_iommu_order(size_io, tbl);
 	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
 			      mask >> tbl->it_page_shift, io_order, 0);
 	if (mapping == DMA_MAPPING_ERROR) {
@@ -900,10 +901,9 @@  void iommu_free_coherent(struct iommu_table *tbl, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
 	if (tbl) {
-		unsigned int nio_pages;
+		size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
+		unsigned int nio_pages = size_io >> tbl->it_page_shift;
 
-		size = PAGE_ALIGN(size);
-		nio_pages = size >> tbl->it_page_shift;
 		iommu_free(tbl, dma_handle, nio_pages);
 		size = PAGE_ALIGN(size);
 		free_pages((unsigned long)vaddr, get_order(size));