@@ -1099,6 +1099,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
ret);
}
+/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */
+static int iommu_get_page_shift(u32 query_page_size)
+{
+ /* Supported IO page-sizes according to LoPAR */
+ const int shift[] = {
+ __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M),
+ __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M),
+ __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G)
+ };
+
+ int i = ARRAY_SIZE(shift) - 1;
+
+ /*
+ * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field:
+ * - bit 31 means 4k pages are supported,
+ * - bit 30 means 64k pages are supported, and so on.
+ * Larger pagesizes map more memory with the same amount of TCEs, so start probing them.
+ */
+ for (; i >= 0 ; i--) {
+ if (query_page_size & (1 << i))
+ return shift[i];
+ }
+
+ /* No valid page size found. */
+ return 0;
+}
+
/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
@@ -1206,13 +1233,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_failed;
}
}
- if (query.page_size & 4) {
- page_shift = 24; /* 16MB */
- } else if (query.page_size & 2) {
- page_shift = 16; /* 64kB */
- } else if (query.page_size & 1) {
- page_shift = 12; /* 4kB */
- } else {
+
+ page_shift = iommu_get_page_shift(query.page_size);
+ if (!page_shift) {
dev_dbg(&dev->dev, "no supported direct page size in mask %x",
query.page_size);
goto out_failed;
According to LoPAR, ibm,query-pe-dma-window output named "IO Page Sizes" will let the OS know all possible pagesizes that can be used for creating a new DDW. Currently Linux will only try using 3 of the 8 available options: 4K, 64K and 16M. According to LoPAR, Hypervisor may also offer 32M, 64M, 128M, 256M and 16G. Enabling bigger pages would be interesting for direct mapping systems with a lot of RAM, while using less TCE entries. Signed-off-by: Leonardo Bras <leobras.c@gmail.com> --- Changes since v2: - Restore 'int array & shift' strategy - Remove defines for RTAS "IO Page Size" output of ibm,query-pe-dma-window - Added/Improved comments Link: http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20210407195613.131140-1-leobras.c@gmail.com/ Changes since v1: - Remove page shift defines, replace by __builtin_ctzll(SZ_XXX) - Add bit field defines for RTAS "IO Page Shift" output of ibm,query-pe-dma-window - Use struct array instead of int array to be more explicit on pagesizes Link: http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20210322190943.715368-1-leobras.c@gmail.com/ arch/powerpc/platforms/pseries/iommu.c | 37 +++++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-)