diff mbox series

[v1,01/10] powerpc/pseries/iommu: Replace hard-coded page shift

Message ID 20200817234033.442511-2-leobras.c@gmail.com (mailing list archive)
State Superseded, archived
Headers show
Series DDW indirect mapping | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (97a94d178e5876ad49482c42b13b7296cd6803de)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 147 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Leonardo Brás Aug. 17, 2020, 11:40 p.m. UTC
Some functions assume IOMMU page size can only be 4K (pageshift == 12).
Update them to accept any page size passed, so we can use 64K pages.

In the process, some defines like TCE_SHIFT were made obsolete, and then
removed. TCE_RPN_MASK was updated to generate a mask according to
the pageshift used.

Most places had a tbl struct, so using tbl->it_page_shift was simple.
tce_free_pSeriesLP() was a special case, since callers not always have a
tbl struct, so adding a tceshift parameter seems the right thing to do.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
---
 arch/powerpc/include/asm/tce.h         | 10 ++----
 arch/powerpc/platforms/pseries/iommu.c | 42 ++++++++++++++++----------
 2 files changed, 28 insertions(+), 24 deletions(-)

Comments

Alexey Kardashevskiy Aug. 22, 2020, 9:33 a.m. UTC | #1
On 18/08/2020 09:40, Leonardo Bras wrote:
> Some functions assume IOMMU page size can only be 4K (pageshift == 12).
> Update them to accept any page size passed, so we can use 64K pages.
> 
> In the process, some defines like TCE_SHIFT were made obsolete, and then
> removed. TCE_RPN_MASK was updated to generate a mask according to
> the pageshift used.
> 
> Most places had a tbl struct, so using tbl->it_page_shift was simple.
> tce_free_pSeriesLP() was a special case, since callers not always have a
> tbl struct, so adding a tceshift parameter seems the right thing to do.
> 
> Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
> ---
>  arch/powerpc/include/asm/tce.h         | 10 ++----
>  arch/powerpc/platforms/pseries/iommu.c | 42 ++++++++++++++++----------
>  2 files changed, 28 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
> index db5fc2f2262d..971cba2d87cc 100644
> --- a/arch/powerpc/include/asm/tce.h
> +++ b/arch/powerpc/include/asm/tce.h
> @@ -19,15 +19,9 @@
>  #define TCE_VB			0
>  #define TCE_PCI			1
>  
> -/* TCE page size is 4096 bytes (1 << 12) */
> -
> -#define TCE_SHIFT	12
> -#define TCE_PAGE_SIZE	(1 << TCE_SHIFT)
> -
>  #define TCE_ENTRY_SIZE		8		/* each TCE is 64 bits */
> -
> -#define TCE_RPN_MASK		0xfffffffffful  /* 40-bit RPN (4K pages) */
> -#define TCE_RPN_SHIFT		12
> +#define TCE_RPN_BITS		52		/* Bits 0-51 represent RPN on TCE */


Ditch this one and use MAX_PHYSMEM_BITS instead? I am pretty sure this
is the actual limit.


> +#define TCE_RPN_MASK(ps)	((1ul << (TCE_RPN_BITS - (ps))) - 1)
>  #define TCE_VALID		0x800		/* TCE valid */
>  #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
>  #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> index e4198700ed1a..8fe23b7dff3a 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -107,6 +107,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
>  	u64 proto_tce;
>  	__be64 *tcep;
>  	u64 rpn;
> +	const unsigned long tceshift = tbl->it_page_shift;
> +	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
> +	const u64 rpn_mask = TCE_RPN_MASK(tceshift);

Using IOMMU_PAGE_SIZE macro for the page size and not using
IOMMU_PAGE_MASK for the mask - this incosistency makes my small brain
explode :) I understand the history but maaaaan... Oh well, ok.

Good, otherwise. Thanks,

>  
>  	proto_tce = TCE_PCI_READ; // Read allowed
>  
> @@ -117,10 +120,10 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
>  
>  	while (npages--) {
>  		/* can't move this out since we might cross MEMBLOCK boundary */
> -		rpn = __pa(uaddr) >> TCE_SHIFT;
> -		*tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
> +		rpn = __pa(uaddr) >> tceshift;
> +		*tcep = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
>  
> -		uaddr += TCE_PAGE_SIZE;
> +		uaddr += pagesize;
>  		tcep++;
>  	}
>  	return 0;
> @@ -146,7 +149,7 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
>  	return be64_to_cpu(*tcep);
>  }
>  
> -static void tce_free_pSeriesLP(unsigned long liobn, long, long);
> +static void tce_free_pSeriesLP(unsigned long liobn, long, long, long);
>  static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
>  
>  static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
> @@ -159,6 +162,7 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
>  	u64 rpn;
>  	int ret = 0;
>  	long tcenum_start = tcenum, npages_start = npages;
> +	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
>  
>  	rpn = __pa(uaddr) >> tceshift;
>  	proto_tce = TCE_PCI_READ;
> @@ -166,12 +170,12 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
>  		proto_tce |= TCE_PCI_WRITE;
>  
>  	while (npages--) {
> -		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
> +		tce = proto_tce | (rpn & rpn_mask) << tceshift;
>  		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
>  
>  		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
>  			ret = (int)rc;
> -			tce_free_pSeriesLP(liobn, tcenum_start,
> +			tce_free_pSeriesLP(liobn, tcenum_start, tceshift,
>  			                   (npages_start - (npages + 1)));
>  			break;
>  		}
> @@ -205,10 +209,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  	long tcenum_start = tcenum, npages_start = npages;
>  	int ret = 0;
>  	unsigned long flags;
> +	const unsigned long tceshift = tbl->it_page_shift;
> +	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
>  
>  	if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
>  		return tce_build_pSeriesLP(tbl->it_index, tcenum,
> -					   tbl->it_page_shift, npages, uaddr,
> +					   tceshift, npages, uaddr,
>  		                           direction, attrs);
>  	}
>  
> @@ -225,13 +231,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  		if (!tcep) {
>  			local_irq_restore(flags);
>  			return tce_build_pSeriesLP(tbl->it_index, tcenum,
> -					tbl->it_page_shift,
> +					tceshift,
>  					npages, uaddr, direction, attrs);
>  		}
>  		__this_cpu_write(tce_page, tcep);
>  	}
>  
> -	rpn = __pa(uaddr) >> TCE_SHIFT;
> +	rpn = __pa(uaddr) >> tceshift;
>  	proto_tce = TCE_PCI_READ;
>  	if (direction != DMA_TO_DEVICE)
>  		proto_tce |= TCE_PCI_WRITE;
> @@ -245,12 +251,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  		limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
>  
>  		for (l = 0; l < limit; l++) {
> -			tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
> +			tcep[l] = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
>  			rpn++;
>  		}
>  
>  		rc = plpar_tce_put_indirect((u64)tbl->it_index,
> -					    (u64)tcenum << 12,
> +					    (u64)tcenum << tceshift,
>  					    (u64)__pa(tcep),
>  					    limit);
>  
> @@ -277,12 +283,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  	return ret;
>  }
>  
> -static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
> +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
> +			       long npages)
>  {
>  	u64 rc;
>  
>  	while (npages--) {
> -		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0);
>  
>  		if (rc && printk_ratelimit()) {
>  			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
> @@ -301,9 +308,11 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
>  	u64 rc;
>  
>  	if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
> -		return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
> +		return tce_free_pSeriesLP(tbl->it_index, tcenum,
> +					  tbl->it_page_shift, npages);
>  
> -	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
> +	rc = plpar_tce_stuff((u64)tbl->it_index,
> +			     (u64)tcenum << tbl->it_page_shift, 0, npages);
>  
>  	if (rc && printk_ratelimit()) {
>  		printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
> @@ -319,7 +328,8 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
>  	u64 rc;
>  	unsigned long tce_ret;
>  
> -	rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
> +	rc = plpar_tce_get((u64)tbl->it_index,
> +			   (u64)tcenum << tbl->it_page_shift, &tce_ret);
>  
>  	if (rc && printk_ratelimit()) {
>  		printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
>
Leonardo Brás Aug. 27, 2020, 3:32 p.m. UTC | #2
Hello Alexey, thank you for this feedback!

On Sat, 2020-08-22 at 19:33 +1000, Alexey Kardashevskiy wrote:
> > +#define TCE_RPN_BITS		52		/* Bits 0-51 represent RPN on TCE */
> 
> Ditch this one and use MAX_PHYSMEM_BITS instead? I am pretty sure this
> is the actual limit.

I understand this MAX_PHYSMEM_BITS(51) comes from the maximum physical memory addressable in the machine. IIUC, it means we can access physical address up to (1ul << MAX_PHYSMEM_BITS). 

This 52 comes from PAPR "Table 9. TCE Definition" which defines bits
0-51 as the RPN. By looking at code, I understand that it means we may input any address < (1ul << 52) to TCE.

In practice, MAX_PHYSMEM_BITS should be enough as of today, because I suppose we can't ever pass a physical page address over 
(1ul << 51), and TCE accepts up to (1ul << 52).
But if we ever increase MAX_PHYSMEM_BITS, it doesn't necessarily means that TCE_RPN_BITS will also be increased, so I think they are independent values. 

Does it make sense? Please let me know if I am missing something.

> 
> 
> > +#define TCE_RPN_MASK(ps)	((1ul << (TCE_RPN_BITS - (ps))) - 1)
> >  #define TCE_VALID		0x800		/* TCE valid */
> >  #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
> >  #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
> > diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> > index e4198700ed1a..8fe23b7dff3a 100644
> > --- a/arch/powerpc/platforms/pseries/iommu.c
> > +++ b/arch/powerpc/platforms/pseries/iommu.c
> > @@ -107,6 +107,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
> >  	u64 proto_tce;
> >  	__be64 *tcep;
> >  	u64 rpn;
> > +	const unsigned long tceshift = tbl->it_page_shift;
> > +	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
> > +	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
> 
> Using IOMMU_PAGE_SIZE macro for the page size and not using
> IOMMU_PAGE_MASK for the mask - this incosistency makes my small brain
> explode :) I understand the history but maaaaan... Oh well, ok.
> 

Yeah, it feels kind of weird after two IOMMU related consts. :)
But sure IOMMU_PAGE_MASK() would not be useful here :)

And this kind of let me thinking:
> > +		rpn = __pa(uaddr) >> tceshift;
> > +		*tcep = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
Why not:
	rpn_mask =  TCE_RPN_MASK(tceshift) << tceshift;
	
	rpn = __pa(uaddr) & rpn_mask;
	*tcep = cpu_to_be64(proto_tce | rpn)

I am usually afraid of changing stuff like this, but I think it's safe.

> Good, otherwise. Thanks,

Thank you for reviewing!
Alexey Kardashevskiy Aug. 28, 2020, 2:27 a.m. UTC | #3
On 28/08/2020 01:32, Leonardo Bras wrote:
> Hello Alexey, thank you for this feedback!
> 
> On Sat, 2020-08-22 at 19:33 +1000, Alexey Kardashevskiy wrote:
>>> +#define TCE_RPN_BITS		52		/* Bits 0-51 represent RPN on TCE */
>>
>> Ditch this one and use MAX_PHYSMEM_BITS instead? I am pretty sure this
>> is the actual limit.
> 
> I understand this MAX_PHYSMEM_BITS(51) comes from the maximum physical memory addressable in the machine. IIUC, it means we can access physical address up to (1ul << MAX_PHYSMEM_BITS). 
> 
> This 52 comes from PAPR "Table 9. TCE Definition" which defines bits
> 0-51 as the RPN. By looking at code, I understand that it means we may input any address < (1ul << 52) to TCE.
> 
> In practice, MAX_PHYSMEM_BITS should be enough as of today, because I suppose we can't ever pass a physical page address over 
> (1ul << 51), and TCE accepts up to (1ul << 52).
> But if we ever increase MAX_PHYSMEM_BITS, it doesn't necessarily means that TCE_RPN_BITS will also be increased, so I think they are independent values. 
> 
> Does it make sense? Please let me know if I am missing something.

The underlying hardware is PHB3/4 about which the IODA2 Version 2.4
6Apr2012.pdf spec says:

"The number of most significant RPN bits implemented in the TCE is
dependent on the max size of System Memory to be supported by the platform".

IODA3 is the same on this matter.

This is MAX_PHYSMEM_BITS and PHB itself does not have any other limits
on top of that. So the only real limit comes from MAX_PHYSMEM_BITS and
where TCE_RPN_BITS comes from exactly - I have no idea.


> 
>>
>>
>>> +#define TCE_RPN_MASK(ps)	((1ul << (TCE_RPN_BITS - (ps))) - 1)
>>>  #define TCE_VALID		0x800		/* TCE valid */
>>>  #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
>>>  #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
>>> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
>>> index e4198700ed1a..8fe23b7dff3a 100644
>>> --- a/arch/powerpc/platforms/pseries/iommu.c
>>> +++ b/arch/powerpc/platforms/pseries/iommu.c
>>> @@ -107,6 +107,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
>>>  	u64 proto_tce;
>>>  	__be64 *tcep;
>>>  	u64 rpn;
>>> +	const unsigned long tceshift = tbl->it_page_shift;
>>> +	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
>>> +	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
>>
>> Using IOMMU_PAGE_SIZE macro for the page size and not using
>> IOMMU_PAGE_MASK for the mask - this incosistency makes my small brain
>> explode :) I understand the history but maaaaan... Oh well, ok.
>>
> 
> Yeah, it feels kind of weird after two IOMMU related consts. :)
> But sure IOMMU_PAGE_MASK() would not be useful here :)
> 
> And this kind of let me thinking:
>>> +		rpn = __pa(uaddr) >> tceshift;
>>> +		*tcep = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
> Why not:
> 	rpn_mask =  TCE_RPN_MASK(tceshift) << tceshift;


A mask for a page number (but not the address!) hurts my brain, masks
are good against addresses but numbers should already have all bits
adjusted imho, may be it is just me :-/


> 	
> 	rpn = __pa(uaddr) & rpn_mask;
> 	*tcep = cpu_to_be64(proto_tce | rpn)
> 
> I am usually afraid of changing stuff like this, but I think it's safe.
> 
>> Good, otherwise. Thanks,
> 
> Thank you for reviewing!
>  
> 
>
Leonardo Brás Aug. 28, 2020, 7:55 p.m. UTC | #4
On Fri, 2020-08-28 at 12:27 +1000, Alexey Kardashevskiy wrote:
> 
> On 28/08/2020 01:32, Leonardo Bras wrote:
> > Hello Alexey, thank you for this feedback!
> > 
> > On Sat, 2020-08-22 at 19:33 +1000, Alexey Kardashevskiy wrote:
> > > > +#define TCE_RPN_BITS		52		/* Bits 0-51 represent RPN on TCE */
> > > 
> > > Ditch this one and use MAX_PHYSMEM_BITS instead? I am pretty sure this
> > > is the actual limit.
> > 
> > I understand this MAX_PHYSMEM_BITS(51) comes from the maximum physical memory addressable in the machine. IIUC, it means we can access physical address up to (1ul << MAX_PHYSMEM_BITS). 
> > 
> > This 52 comes from PAPR "Table 9. TCE Definition" which defines bits
> > 0-51 as the RPN. By looking at code, I understand that it means we may input any address < (1ul << 52) to TCE.
> > 
> > In practice, MAX_PHYSMEM_BITS should be enough as of today, because I suppose we can't ever pass a physical page address over 
> > (1ul << 51), and TCE accepts up to (1ul << 52).
> > But if we ever increase MAX_PHYSMEM_BITS, it doesn't necessarily means that TCE_RPN_BITS will also be increased, so I think they are independent values. 
> > 
> > Does it make sense? Please let me know if I am missing something.
> 
> The underlying hardware is PHB3/4 about which the IODA2 Version 2.4
> 6Apr2012.pdf spec says:
> 
> "The number of most significant RPN bits implemented in the TCE is
> dependent on the max size of System Memory to be supported by the platform".
> 
> IODA3 is the same on this matter.
> 
> This is MAX_PHYSMEM_BITS and PHB itself does not have any other limits
> on top of that. So the only real limit comes from MAX_PHYSMEM_BITS and
> where TCE_RPN_BITS comes from exactly - I have no idea.

Well, I created this TCE_RPN_BITS = 52 because the previous mask was a
hardcoded 40-bit mask (0xfffffffffful), for hard-coded 12-bit (4k)
pagesize, and on PAPR+/LoPAR also defines TCE as having bits 0-51
described as RPN, as described before.

IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figure 3.4 and 3.5.
shows system memory mapping into a TCE, and the TCE also has bits 0-51
for the RPN (52 bits). "Table 3.6. TCE Definition" also shows it.

In fact, by the looks of those figures, the RPN_MASK should always be a
52-bit mask, and RPN = (page >> tceshift) & RPN_MASK.

Maybe that's it?

> 
> 
> > > 
> > > > +#define TCE_RPN_MASK(ps)	((1ul << (TCE_RPN_BITS - (ps))) - 1)
> > > >  #define TCE_VALID		0x800		/* TCE valid */
> > > >  #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
> > > >  #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
> > > > diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> > > > index e4198700ed1a..8fe23b7dff3a 100644
> > > > --- a/arch/powerpc/platforms/pseries/iommu.c
> > > > +++ b/arch/powerpc/platforms/pseries/iommu.c
> > > > @@ -107,6 +107,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
> > > >  	u64 proto_tce;
> > > >  	__be64 *tcep;
> > > >  	u64 rpn;
> > > > +	const unsigned long tceshift = tbl->it_page_shift;
> > > > +	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
> > > > +	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
> > > 
> > > Using IOMMU_PAGE_SIZE macro for the page size and not using
> > > IOMMU_PAGE_MASK for the mask - this incosistency makes my small brain
> > > explode :) I understand the history but maaaaan... Oh well, ok.
> > > 
> > 
> > Yeah, it feels kind of weird after two IOMMU related consts. :)
> > But sure IOMMU_PAGE_MASK() would not be useful here :)
> > 
> > And this kind of let me thinking:
> > > > +		rpn = __pa(uaddr) >> tceshift;
> > > > +		*tcep = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
> > Why not:
> > 	rpn_mask =  TCE_RPN_MASK(tceshift) << tceshift;
> 
> A mask for a page number (but not the address!) hurts my brain, masks
> are good against addresses but numbers should already have all bits
> adjusted imho, may be it is just me :-/
> 
> 
> > 	
> > 	rpn = __pa(uaddr) & rpn_mask;
> > 	*tcep = cpu_to_be64(proto_tce | rpn)
> > 
> > I am usually afraid of changing stuff like this, but I think it's safe.
> > 
> > > Good, otherwise. Thanks,
> > 
> > Thank you for reviewing!
> >  
> > 
> >
Alexey Kardashevskiy Aug. 31, 2020, 12:06 a.m. UTC | #5
On 29/08/2020 05:55, Leonardo Bras wrote:
> On Fri, 2020-08-28 at 12:27 +1000, Alexey Kardashevskiy wrote:
>>
>> On 28/08/2020 01:32, Leonardo Bras wrote:
>>> Hello Alexey, thank you for this feedback!
>>>
>>> On Sat, 2020-08-22 at 19:33 +1000, Alexey Kardashevskiy wrote:
>>>>> +#define TCE_RPN_BITS		52		/* Bits 0-51 represent RPN on TCE */
>>>>
>>>> Ditch this one and use MAX_PHYSMEM_BITS instead? I am pretty sure this
>>>> is the actual limit.
>>>
>>> I understand this MAX_PHYSMEM_BITS(51) comes from the maximum physical memory addressable in the machine. IIUC, it means we can access physical address up to (1ul << MAX_PHYSMEM_BITS). 
>>>
>>> This 52 comes from PAPR "Table 9. TCE Definition" which defines bits
>>> 0-51 as the RPN. By looking at code, I understand that it means we may input any address < (1ul << 52) to TCE.
>>>
>>> In practice, MAX_PHYSMEM_BITS should be enough as of today, because I suppose we can't ever pass a physical page address over 
>>> (1ul << 51), and TCE accepts up to (1ul << 52).
>>> But if we ever increase MAX_PHYSMEM_BITS, it doesn't necessarily means that TCE_RPN_BITS will also be increased, so I think they are independent values. 
>>>
>>> Does it make sense? Please let me know if I am missing something.
>>
>> The underlying hardware is PHB3/4 about which the IODA2 Version 2.4
>> 6Apr2012.pdf spec says:
>>
>> "The number of most significant RPN bits implemented in the TCE is
>> dependent on the max size of System Memory to be supported by the platform".
>>
>> IODA3 is the same on this matter.
>>
>> This is MAX_PHYSMEM_BITS and PHB itself does not have any other limits
>> on top of that. So the only real limit comes from MAX_PHYSMEM_BITS and
>> where TCE_RPN_BITS comes from exactly - I have no idea.
> 
> Well, I created this TCE_RPN_BITS = 52 because the previous mask was a
> hardcoded 40-bit mask (0xfffffffffful), for hard-coded 12-bit (4k)
> pagesize, and on PAPR+/LoPAR also defines TCE as having bits 0-51
> described as RPN, as described before.
> 
> IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figure 3.4 and 3.5.
> shows system memory mapping into a TCE, and the TCE also has bits 0-51
> for the RPN (52 bits). "Table 3.6. TCE Definition" also shows it.
>> In fact, by the looks of those figures, the RPN_MASK should always be a
> 52-bit mask, and RPN = (page >> tceshift) & RPN_MASK.


I suspect the mask is there in the first place for extra protection
against too big addresses going to the TCE table (or/and for virtial vs
physical addresses). Using 52bit mask makes no sense for anything, you
could just drop the mask and let c compiler deal with 64bit "uint" as it
is basically a 4K page address anywhere in the 64bit space. Thanks,


> Maybe that's it?




> 
>>
>>
>>>>
>>>>> +#define TCE_RPN_MASK(ps)	((1ul << (TCE_RPN_BITS - (ps))) - 1)
>>>>>  #define TCE_VALID		0x800		/* TCE valid */
>>>>>  #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
>>>>>  #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
>>>>> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
>>>>> index e4198700ed1a..8fe23b7dff3a 100644
>>>>> --- a/arch/powerpc/platforms/pseries/iommu.c
>>>>> +++ b/arch/powerpc/platforms/pseries/iommu.c
>>>>> @@ -107,6 +107,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
>>>>>  	u64 proto_tce;
>>>>>  	__be64 *tcep;
>>>>>  	u64 rpn;
>>>>> +	const unsigned long tceshift = tbl->it_page_shift;
>>>>> +	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
>>>>> +	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
>>>>
>>>> Using IOMMU_PAGE_SIZE macro for the page size and not using
>>>> IOMMU_PAGE_MASK for the mask - this incosistency makes my small brain
>>>> explode :) I understand the history but maaaaan... Oh well, ok.
>>>>
>>>
>>> Yeah, it feels kind of weird after two IOMMU related consts. :)
>>> But sure IOMMU_PAGE_MASK() would not be useful here :)
>>>
>>> And this kind of let me thinking:
>>>>> +		rpn = __pa(uaddr) >> tceshift;
>>>>> +		*tcep = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
>>> Why not:
>>> 	rpn_mask =  TCE_RPN_MASK(tceshift) << tceshift;
>>
>> A mask for a page number (but not the address!) hurts my brain, masks
>> are good against addresses but numbers should already have all bits
>> adjusted imho, may be it is just me :-/
>>
>>
>>> 	
>>> 	rpn = __pa(uaddr) & rpn_mask;
>>> 	*tcep = cpu_to_be64(proto_tce | rpn)
>>>
>>> I am usually afraid of changing stuff like this, but I think it's safe.
>>>
>>>> Good, otherwise. Thanks,
>>>
>>> Thank you for reviewing!
>>>  
>>>
>>>
>
Oliver O'Halloran Aug. 31, 2020, 1:41 a.m. UTC | #6
On Mon, Aug 31, 2020 at 10:08 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 29/08/2020 05:55, Leonardo Bras wrote:
> > On Fri, 2020-08-28 at 12:27 +1000, Alexey Kardashevskiy wrote:
> >>
> >> On 28/08/2020 01:32, Leonardo Bras wrote:
> >>> Hello Alexey, thank you for this feedback!
> >>>
> >>> On Sat, 2020-08-22 at 19:33 +1000, Alexey Kardashevskiy wrote:
> >>>>> +#define TCE_RPN_BITS             52              /* Bits 0-51 represent RPN on TCE */
> >>>>
> >>>> Ditch this one and use MAX_PHYSMEM_BITS instead? I am pretty sure this
> >>>> is the actual limit.
> >>>
> >>> I understand this MAX_PHYSMEM_BITS(51) comes from the maximum physical memory addressable in the machine. IIUC, it means we can access physical address up to (1ul << MAX_PHYSMEM_BITS).
> >>>
> >>> This 52 comes from PAPR "Table 9. TCE Definition" which defines bits
> >>> 0-51 as the RPN. By looking at code, I understand that it means we may input any address < (1ul << 52) to TCE.
> >>>
> >>> In practice, MAX_PHYSMEM_BITS should be enough as of today, because I suppose we can't ever pass a physical page address over
> >>> (1ul << 51), and TCE accepts up to (1ul << 52).
> >>> But if we ever increase MAX_PHYSMEM_BITS, it doesn't necessarily means that TCE_RPN_BITS will also be increased, so I think they are independent values.
> >>>
> >>> Does it make sense? Please let me know if I am missing something.
> >>
> >> The underlying hardware is PHB3/4 about which the IODA2 Version 2.4
> >> 6Apr2012.pdf spec says:
> >>
> >> "The number of most significant RPN bits implemented in the TCE is
> >> dependent on the max size of System Memory to be supported by the platform".
> >>
> >> IODA3 is the same on this matter.
> >>
> >> This is MAX_PHYSMEM_BITS and PHB itself does not have any other limits
> >> on top of that. So the only real limit comes from MAX_PHYSMEM_BITS and
> >> where TCE_RPN_BITS comes from exactly - I have no idea.
> >
> > Well, I created this TCE_RPN_BITS = 52 because the previous mask was a
> > hardcoded 40-bit mask (0xfffffffffful), for hard-coded 12-bit (4k)
> > pagesize, and on PAPR+/LoPAR also defines TCE as having bits 0-51
> > described as RPN, as described before.
> >
> > IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figure 3.4 and 3.5.
> > shows system memory mapping into a TCE, and the TCE also has bits 0-51
> > for the RPN (52 bits). "Table 3.6. TCE Definition" also shows it.
> >> In fact, by the looks of those figures, the RPN_MASK should always be a
> > 52-bit mask, and RPN = (page >> tceshift) & RPN_MASK.
>
> I suspect the mask is there in the first place for extra protection
> against too big addresses going to the TCE table (or/and for virtial vs
> physical addresses). Using 52bit mask makes no sense for anything, you
> could just drop the mask and let c compiler deal with 64bit "uint" as it
> is basically a 4K page address anywhere in the 64bit space. Thanks,

Assuming 4K pages you need 52 RPN bits to cover the whole 64bit
physical address space. The IODA3 spec does explicitly say the upper
bits are optional and the implementation only needs to support enough
to cover up to the physical address limit, which is 56bits of P9 /
PHB4. If you want to validate that the address will fit inside of
MAX_PHYSMEM_BITS then fine, but I think that should be done as a
WARN_ON or similar rather than just silently masking off the bits.
Alexey Kardashevskiy Aug. 31, 2020, 3:48 a.m. UTC | #7
On 31/08/2020 11:41, Oliver O'Halloran wrote:
> On Mon, Aug 31, 2020 at 10:08 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>> On 29/08/2020 05:55, Leonardo Bras wrote:
>>> On Fri, 2020-08-28 at 12:27 +1000, Alexey Kardashevskiy wrote:
>>>>
>>>> On 28/08/2020 01:32, Leonardo Bras wrote:
>>>>> Hello Alexey, thank you for this feedback!
>>>>>
>>>>> On Sat, 2020-08-22 at 19:33 +1000, Alexey Kardashevskiy wrote:
>>>>>>> +#define TCE_RPN_BITS             52              /* Bits 0-51 represent RPN on TCE */
>>>>>>
>>>>>> Ditch this one and use MAX_PHYSMEM_BITS instead? I am pretty sure this
>>>>>> is the actual limit.
>>>>>
>>>>> I understand this MAX_PHYSMEM_BITS(51) comes from the maximum physical memory addressable in the machine. IIUC, it means we can access physical address up to (1ul << MAX_PHYSMEM_BITS).
>>>>>
>>>>> This 52 comes from PAPR "Table 9. TCE Definition" which defines bits
>>>>> 0-51 as the RPN. By looking at code, I understand that it means we may input any address < (1ul << 52) to TCE.
>>>>>
>>>>> In practice, MAX_PHYSMEM_BITS should be enough as of today, because I suppose we can't ever pass a physical page address over
>>>>> (1ul << 51), and TCE accepts up to (1ul << 52).
>>>>> But if we ever increase MAX_PHYSMEM_BITS, it doesn't necessarily means that TCE_RPN_BITS will also be increased, so I think they are independent values.
>>>>>
>>>>> Does it make sense? Please let me know if I am missing something.
>>>>
>>>> The underlying hardware is PHB3/4 about which the IODA2 Version 2.4
>>>> 6Apr2012.pdf spec says:
>>>>
>>>> "The number of most significant RPN bits implemented in the TCE is
>>>> dependent on the max size of System Memory to be supported by the platform".
>>>>
>>>> IODA3 is the same on this matter.
>>>>
>>>> This is MAX_PHYSMEM_BITS and PHB itself does not have any other limits
>>>> on top of that. So the only real limit comes from MAX_PHYSMEM_BITS and
>>>> where TCE_RPN_BITS comes from exactly - I have no idea.
>>>
>>> Well, I created this TCE_RPN_BITS = 52 because the previous mask was a
>>> hardcoded 40-bit mask (0xfffffffffful), for hard-coded 12-bit (4k)
>>> pagesize, and on PAPR+/LoPAR also defines TCE as having bits 0-51
>>> described as RPN, as described before.
>>>
>>> IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figure 3.4 and 3.5.
>>> shows system memory mapping into a TCE, and the TCE also has bits 0-51
>>> for the RPN (52 bits). "Table 3.6. TCE Definition" also shows it.
>>>> In fact, by the looks of those figures, the RPN_MASK should always be a
>>> 52-bit mask, and RPN = (page >> tceshift) & RPN_MASK.
>>
>> I suspect the mask is there in the first place for extra protection
>> against too big addresses going to the TCE table (or/and for virtial vs
>> physical addresses). Using 52bit mask makes no sense for anything, you
>> could just drop the mask and let c compiler deal with 64bit "uint" as it
>> is basically a 4K page address anywhere in the 64bit space. Thanks,
> 
> Assuming 4K pages you need 52 RPN bits to cover the whole 64bit
> physical address space. The IODA3 spec does explicitly say the upper
> bits are optional and the implementation only needs to support enough
> to cover up to the physical address limit, which is 56bits of P9 /
> PHB4. If you want to validate that the address will fit inside of
> MAX_PHYSMEM_BITS then fine, but I think that should be done as a
> WARN_ON or similar rather than just silently masking off the bits.

We can do this and probably should anyway but I am also pretty sure we
can just ditch the mask and have the hypervisor return an error which
will show up in dmesg.
Leonardo Brás Sept. 1, 2020, 9:38 p.m. UTC | #8
On Mon, 2020-08-31 at 13:48 +1000, Alexey Kardashevskiy wrote:
> > > > Well, I created this TCE_RPN_BITS = 52 because the previous mask was a
> > > > hardcoded 40-bit mask (0xfffffffffful), for hard-coded 12-bit (4k)
> > > > pagesize, and on PAPR+/LoPAR also defines TCE as having bits 0-51
> > > > described as RPN, as described before.
> > > > 
> > > > IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figure 3.4 and 3.5.
> > > > shows system memory mapping into a TCE, and the TCE also has bits 0-51
> > > > for the RPN (52 bits). "Table 3.6. TCE Definition" also shows it.
> > > > In fact, by the looks of those figures, the RPN_MASK should always be a
> > > > 52-bit mask, and RPN = (page >> tceshift) & RPN_MASK.
> > > 
> > > I suspect the mask is there in the first place for extra protection
> > > against too big addresses going to the TCE table (or/and for virtial vs
> > > physical addresses). Using 52bit mask makes no sense for anything, you
> > > could just drop the mask and let c compiler deal with 64bit "uint" as it
> > > is basically a 4K page address anywhere in the 64bit space. Thanks,
> > 
> > Assuming 4K pages you need 52 RPN bits to cover the whole 64bit
> > physical address space. The IODA3 spec does explicitly say the upper
> > bits are optional and the implementation only needs to support enough
> > to cover up to the physical address limit, which is 56bits of P9 /
> > PHB4. If you want to validate that the address will fit inside of
> > MAX_PHYSMEM_BITS then fine, but I think that should be done as a
> > WARN_ON or similar rather than just silently masking off the bits.
> 
> We can do this and probably should anyway but I am also pretty sure we
> can just ditch the mask and have the hypervisor return an error which
> will show up in dmesg.

Ok then, ditching the mask.
Thanks!
Alexey Kardashevskiy Sept. 3, 2020, 4:26 a.m. UTC | #9
On 02/09/2020 07:38, Leonardo Bras wrote:
> On Mon, 2020-08-31 at 13:48 +1000, Alexey Kardashevskiy wrote:
>>>>> Well, I created this TCE_RPN_BITS = 52 because the previous mask was a
>>>>> hardcoded 40-bit mask (0xfffffffffful), for hard-coded 12-bit (4k)
>>>>> pagesize, and on PAPR+/LoPAR also defines TCE as having bits 0-51
>>>>> described as RPN, as described before.
>>>>>
>>>>> IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figure 3.4 and 3.5.
>>>>> shows system memory mapping into a TCE, and the TCE also has bits 0-51
>>>>> for the RPN (52 bits). "Table 3.6. TCE Definition" also shows it.
>>>>> In fact, by the looks of those figures, the RPN_MASK should always be a
>>>>> 52-bit mask, and RPN = (page >> tceshift) & RPN_MASK.
>>>>
>>>> I suspect the mask is there in the first place for extra protection
>>>> against too big addresses going to the TCE table (or/and for virtial vs
>>>> physical addresses). Using 52bit mask makes no sense for anything, you
>>>> could just drop the mask and let c compiler deal with 64bit "uint" as it
>>>> is basically a 4K page address anywhere in the 64bit space. Thanks,
>>>
>>> Assuming 4K pages you need 52 RPN bits to cover the whole 64bit
>>> physical address space. The IODA3 spec does explicitly say the upper
>>> bits are optional and the implementation only needs to support enough
>>> to cover up to the physical address limit, which is 56bits of P9 /
>>> PHB4. If you want to validate that the address will fit inside of
>>> MAX_PHYSMEM_BITS then fine, but I think that should be done as a
>>> WARN_ON or similar rather than just silently masking off the bits.
>>
>> We can do this and probably should anyway but I am also pretty sure we
>> can just ditch the mask and have the hypervisor return an error which
>> will show up in dmesg.
> 
> Ok then, ditching the mask.


Well, you could run a little experiment and set some bits above that old 
mask and see how phyp reacts :)


> Thanks!
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
index db5fc2f2262d..971cba2d87cc 100644
--- a/arch/powerpc/include/asm/tce.h
+++ b/arch/powerpc/include/asm/tce.h
@@ -19,15 +19,9 @@ 
 #define TCE_VB			0
 #define TCE_PCI			1
 
-/* TCE page size is 4096 bytes (1 << 12) */
-
-#define TCE_SHIFT	12
-#define TCE_PAGE_SIZE	(1 << TCE_SHIFT)
-
 #define TCE_ENTRY_SIZE		8		/* each TCE is 64 bits */
-
-#define TCE_RPN_MASK		0xfffffffffful  /* 40-bit RPN (4K pages) */
-#define TCE_RPN_SHIFT		12
+#define TCE_RPN_BITS		52		/* Bits 0-51 represent RPN on TCE */
+#define TCE_RPN_MASK(ps)	((1ul << (TCE_RPN_BITS - (ps))) - 1)
 #define TCE_VALID		0x800		/* TCE valid */
 #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
 #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index e4198700ed1a..8fe23b7dff3a 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -107,6 +107,9 @@  static int tce_build_pSeries(struct iommu_table *tbl, long index,
 	u64 proto_tce;
 	__be64 *tcep;
 	u64 rpn;
+	const unsigned long tceshift = tbl->it_page_shift;
+	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
+	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
 
 	proto_tce = TCE_PCI_READ; // Read allowed
 
@@ -117,10 +120,10 @@  static int tce_build_pSeries(struct iommu_table *tbl, long index,
 
 	while (npages--) {
 		/* can't move this out since we might cross MEMBLOCK boundary */
-		rpn = __pa(uaddr) >> TCE_SHIFT;
-		*tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+		rpn = __pa(uaddr) >> tceshift;
+		*tcep = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
 
-		uaddr += TCE_PAGE_SIZE;
+		uaddr += pagesize;
 		tcep++;
 	}
 	return 0;
@@ -146,7 +149,7 @@  static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 	return be64_to_cpu(*tcep);
 }
 
-static void tce_free_pSeriesLP(unsigned long liobn, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long, long);
 static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 
 static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
@@ -159,6 +162,7 @@  static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
 	u64 rpn;
 	int ret = 0;
 	long tcenum_start = tcenum, npages_start = npages;
+	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
 
 	rpn = __pa(uaddr) >> tceshift;
 	proto_tce = TCE_PCI_READ;
@@ -166,12 +170,12 @@  static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
 		proto_tce |= TCE_PCI_WRITE;
 
 	while (npages--) {
-		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+		tce = proto_tce | (rpn & rpn_mask) << tceshift;
 		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
 
 		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 			ret = (int)rc;
-			tce_free_pSeriesLP(liobn, tcenum_start,
+			tce_free_pSeriesLP(liobn, tcenum_start, tceshift,
 			                   (npages_start - (npages + 1)));
 			break;
 		}
@@ -205,10 +209,12 @@  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	long tcenum_start = tcenum, npages_start = npages;
 	int ret = 0;
 	unsigned long flags;
+	const unsigned long tceshift = tbl->it_page_shift;
+	const u64 rpn_mask = TCE_RPN_MASK(tceshift);
 
 	if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
 		return tce_build_pSeriesLP(tbl->it_index, tcenum,
-					   tbl->it_page_shift, npages, uaddr,
+					   tceshift, npages, uaddr,
 		                           direction, attrs);
 	}
 
@@ -225,13 +231,13 @@  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		if (!tcep) {
 			local_irq_restore(flags);
 			return tce_build_pSeriesLP(tbl->it_index, tcenum,
-					tbl->it_page_shift,
+					tceshift,
 					npages, uaddr, direction, attrs);
 		}
 		__this_cpu_write(tce_page, tcep);
 	}
 
-	rpn = __pa(uaddr) >> TCE_SHIFT;
+	rpn = __pa(uaddr) >> tceshift;
 	proto_tce = TCE_PCI_READ;
 	if (direction != DMA_TO_DEVICE)
 		proto_tce |= TCE_PCI_WRITE;
@@ -245,12 +251,12 @@  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
 
 		for (l = 0; l < limit; l++) {
-			tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+			tcep[l] = cpu_to_be64(proto_tce | (rpn & rpn_mask) << tceshift);
 			rpn++;
 		}
 
 		rc = plpar_tce_put_indirect((u64)tbl->it_index,
-					    (u64)tcenum << 12,
+					    (u64)tcenum << tceshift,
 					    (u64)__pa(tcep),
 					    limit);
 
@@ -277,12 +283,13 @@  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	return ret;
 }
 
-static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+			       long npages)
 {
 	u64 rc;
 
 	while (npages--) {
-		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
+		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0);
 
 		if (rc && printk_ratelimit()) {
 			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
@@ -301,9 +308,11 @@  static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
 	u64 rc;
 
 	if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
-		return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
+		return tce_free_pSeriesLP(tbl->it_index, tcenum,
+					  tbl->it_page_shift, npages);
 
-	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
+	rc = plpar_tce_stuff((u64)tbl->it_index,
+			     (u64)tcenum << tbl->it_page_shift, 0, npages);
 
 	if (rc && printk_ratelimit()) {
 		printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
@@ -319,7 +328,8 @@  static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 	u64 rc;
 	unsigned long tce_ret;
 
-	rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
+	rc = plpar_tce_get((u64)tbl->it_index,
+			   (u64)tcenum << tbl->it_page_shift, &tce_ret);
 
 	if (rc && printk_ratelimit()) {
 		printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);