diff mbox series

[v1,4/7] arm64/pgtable: support __HAVE_ARCH_PTE_SWP_EXCLUSIVE

Message ID 20220315141837.137118-5-david@redhat.com (mailing list archive)
State Superseded
Headers show
Series mm: COW fixes part 3: reliable GUP R/W FOLL_GET of anonymous pages | expand

Commit Message

David Hildenbrand March 15, 2022, 2:18 p.m. UTC
Let's use one of the type bits: core-mm only supports 5, so there is no
need to consume 6.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/arm64/include/asm/pgtable-prot.h |  1 +
 arch/arm64/include/asm/pgtable.h      | 23 ++++++++++++++++++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

Comments

Catalin Marinas March 16, 2022, 6:27 p.m. UTC | #1
On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index b1e1b74d993c..62e0ebeed720 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -14,6 +14,7 @@
>   * Software defined PTE bits definition.
>   */
>  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
> +#define PTE_SWP_EXCLUSIVE	(_AT(pteval_t, 1) << 2)	 /* only for swp ptes */

I think we can use bit 1 here.

> @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>  /*
>   * Encode and decode a swap entry:
>   *	bits 0-1:	present (must be zero)
> - *	bits 2-7:	swap type
> + *	bits 2:		remember PG_anon_exclusive
> + *	bits 3-7:	swap type
>   *	bits 8-57:	swap offset
>   *	bit  58:	PTE_PROT_NONE (must be zero)

I don't remember exactly why we reserved bits 0 and 1 when, from the
hardware perspective, it's sufficient for bit 0 to be 0 and the whole
pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
level, it's a huge page) but we shouldn't check for this on a swap
entry.
David Hildenbrand March 17, 2022, 10:04 a.m. UTC | #2
On 16.03.22 19:27, Catalin Marinas wrote:
> On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
>> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
>> index b1e1b74d993c..62e0ebeed720 100644
>> --- a/arch/arm64/include/asm/pgtable-prot.h
>> +++ b/arch/arm64/include/asm/pgtable-prot.h
>> @@ -14,6 +14,7 @@
>>   * Software defined PTE bits definition.
>>   */
>>  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
>> +#define PTE_SWP_EXCLUSIVE	(_AT(pteval_t, 1) << 2)	 /* only for swp ptes */
> 
> I think we can use bit 1 here.
> 
>> @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>>  /*
>>   * Encode and decode a swap entry:
>>   *	bits 0-1:	present (must be zero)
>> - *	bits 2-7:	swap type
>> + *	bits 2:		remember PG_anon_exclusive
>> + *	bits 3-7:	swap type
>>   *	bits 8-57:	swap offset
>>   *	bit  58:	PTE_PROT_NONE (must be zero)
> 
> I don't remember exactly why we reserved bits 0 and 1 when, from the
> hardware perspective, it's sufficient for bit 0 to be 0 and the whole
> pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
> level, it's a huge page) but we shouldn't check for this on a swap
> entry.

You mean

arch/arm64/include/asm/pgtable-hwdef.h:#define PTE_TABLE_BIT            (_AT(pteval_t, 1) << 1)

right?

I wonder why it even exists, for arm64 I only spot:

arch/arm64/include/asm/pgtable.h:#define pte_mkhuge(pte)                (__pte(pte_val(pte) & ~PTE_TABLE_BIT))

I don't really see code that sets PTE_TABLE_BIT.

Similarly, I don't see code that sets PMD_TABLE_BIT/PUD_TABLE_BIT/P4D_TABLE_BIT.
Most probably setting code is not using the defines,  that's why I'm not finding it.
Catalin Marinas March 17, 2022, 5:58 p.m. UTC | #3
On Thu, Mar 17, 2022 at 11:04:18AM +0100, David Hildenbrand wrote:
> On 16.03.22 19:27, Catalin Marinas wrote:
> > On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
> >> @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> >>  /*
> >>   * Encode and decode a swap entry:
> >>   *	bits 0-1:	present (must be zero)
> >> - *	bits 2-7:	swap type
> >> + *	bits 2:		remember PG_anon_exclusive
> >> + *	bits 3-7:	swap type
> >>   *	bits 8-57:	swap offset
> >>   *	bit  58:	PTE_PROT_NONE (must be zero)
> > 
> > I don't remember exactly why we reserved bits 0 and 1 when, from the
> > hardware perspective, it's sufficient for bit 0 to be 0 and the whole
> > pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
> > level, it's a huge page) but we shouldn't check for this on a swap
> > entry.
> 
> You mean
> 
> arch/arm64/include/asm/pgtable-hwdef.h:#define PTE_TABLE_BIT            (_AT(pteval_t, 1) << 1)
> 
> right?

Yes.

> I wonder why it even exists, for arm64 I only spot:
> 
> arch/arm64/include/asm/pgtable.h:#define pte_mkhuge(pte)                (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
> 
> I don't really see code that sets PTE_TABLE_BIT.
> 
> Similarly, I don't see code that sets PMD_TABLE_BIT/PUD_TABLE_BIT/P4D_TABLE_BIT.
> Most probably setting code is not using the defines,  that's why I'm not finding it.

It gets set as part of P*D_TYPE_TABLE via p*d_populate(). We use the
P*D_TABLE_BIT mostly for checking whether it's a huge page or not (the
arm64 hugetlbpage.c code).
David Hildenbrand March 18, 2022, 9:59 a.m. UTC | #4
On 17.03.22 18:58, Catalin Marinas wrote:
> On Thu, Mar 17, 2022 at 11:04:18AM +0100, David Hildenbrand wrote:
>> On 16.03.22 19:27, Catalin Marinas wrote:
>>> On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
>>>> @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>>>>  /*
>>>>   * Encode and decode a swap entry:
>>>>   *	bits 0-1:	present (must be zero)
>>>> - *	bits 2-7:	swap type
>>>> + *	bits 2:		remember PG_anon_exclusive
>>>> + *	bits 3-7:	swap type
>>>>   *	bits 8-57:	swap offset
>>>>   *	bit  58:	PTE_PROT_NONE (must be zero)
>>>
>>> I don't remember exactly why we reserved bits 0 and 1 when, from the
>>> hardware perspective, it's sufficient for bit 0 to be 0 and the whole
>>> pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
>>> level, it's a huge page) but we shouldn't check for this on a swap
>>> entry.
>>
>> You mean
>>
>> arch/arm64/include/asm/pgtable-hwdef.h:#define PTE_TABLE_BIT            (_AT(pteval_t, 1) << 1)
>>
>> right?
> 
> Yes.
> 
>> I wonder why it even exists, for arm64 I only spot:
>>
>> arch/arm64/include/asm/pgtable.h:#define pte_mkhuge(pte)                (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
>>
>> I don't really see code that sets PTE_TABLE_BIT.
>>
>> Similarly, I don't see code that sets PMD_TABLE_BIT/PUD_TABLE_BIT/P4D_TABLE_BIT.
>> Most probably setting code is not using the defines,  that's why I'm not finding it.
> 
> It gets set as part of P*D_TYPE_TABLE via p*d_populate(). We use the
> P*D_TABLE_BIT mostly for checking whether it's a huge page or not (the
> arm64 hugetlbpage.c code).
> 

Makes sense, after digging into the arm arm, I agree that it should
be safe to reuse bit 1. I'll use this (yet untested) patch in v2:


From a48d08339574b7c42e0b032f0fc334872591744c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 17 Mar 2022 11:46:26 +0100
Subject: [PATCH] arm64/pgtable: support __HAVE_ARCH_PTE_SWP_EXCLUSIVE

Let's use bit 1, which should be irrelevant if the PTE is marked invalid
eiher way --  we really only care about bit 0.

Note that one alternative would be using one of the type bits: core-mm only
supports 5 bits, so there is no need to reserve space for 6.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/arm64/include/asm/pgtable-prot.h |  1 +
 arch/arm64/include/asm/pgtable.h      | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index b1e1b74d993c..fd6ddf14c190 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -14,6 +14,7 @@
  * Software defined PTE bits definition.
  */
 #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
+#define PTE_SWP_EXCLUSIVE	(PTE_TABLE_BIT)		 /* only for swp ptes */
 #define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
 #define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
 #define PTE_DEVMAP		(_AT(pteval_t, 1) << 57)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 94e147e5456c..c78994073cd0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -402,6 +402,22 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
 	return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
 }
 
+#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+	return pte_val(pte) & PTE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * See the comment in include/linux/pgtable.h
@@ -908,7 +924,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 
 /*
  * Encode and decode a swap entry:
- *	bits 0-1:	present (must be zero)
+ *	bits 0:		present (must be zero)
+ *	bits 1:		remember PG_anon_exclusive
  *	bits 2-7:	swap type
  *	bits 8-57:	swap offset
  *	bit  58:	PTE_PROT_NONE (must be zero)
Catalin Marinas March 18, 2022, 11:33 a.m. UTC | #5
On Fri, Mar 18, 2022 at 10:59:10AM +0100, David Hildenbrand wrote:
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index b1e1b74d993c..fd6ddf14c190 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -14,6 +14,7 @@
>   * Software defined PTE bits definition.
>   */
>  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
> +#define PTE_SWP_EXCLUSIVE	(PTE_TABLE_BIT)		 /* only for swp ptes */
>  #define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
>  #define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
>  #define PTE_DEVMAP		(_AT(pteval_t, 1) << 57)
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 94e147e5456c..c78994073cd0 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -402,6 +402,22 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
>  	return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
>  }
>  
> +#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
> +static inline pte_t pte_swp_mkexclusive(pte_t pte)
> +{
> +	return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
> +}
> +
> +static inline int pte_swp_exclusive(pte_t pte)
> +{
> +	return pte_val(pte) & PTE_SWP_EXCLUSIVE;
> +}
> +
> +static inline pte_t pte_swp_clear_exclusive(pte_t pte)
> +{
> +	return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
> +}
> +
>  #ifdef CONFIG_NUMA_BALANCING
>  /*
>   * See the comment in include/linux/pgtable.h
> @@ -908,7 +924,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>  
>  /*
>   * Encode and decode a swap entry:
> - *	bits 0-1:	present (must be zero)
> + *	bits 0:		present (must be zero)
> + *	bits 1:		remember PG_anon_exclusive

It looks fine to me.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
David Hildenbrand March 18, 2022, 2:14 p.m. UTC | #6
On 18.03.22 12:33, Catalin Marinas wrote:
> On Fri, Mar 18, 2022 at 10:59:10AM +0100, David Hildenbrand wrote:
>> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
>> index b1e1b74d993c..fd6ddf14c190 100644
>> --- a/arch/arm64/include/asm/pgtable-prot.h
>> +++ b/arch/arm64/include/asm/pgtable-prot.h
>> @@ -14,6 +14,7 @@
>>   * Software defined PTE bits definition.
>>   */
>>  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
>> +#define PTE_SWP_EXCLUSIVE	(PTE_TABLE_BIT)		 /* only for swp ptes */
>>  #define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
>>  #define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
>>  #define PTE_DEVMAP		(_AT(pteval_t, 1) << 57)
>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
>> index 94e147e5456c..c78994073cd0 100644
>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -402,6 +402,22 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
>>  	return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
>>  }
>>  
>> +#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
>> +static inline pte_t pte_swp_mkexclusive(pte_t pte)
>> +{
>> +	return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
>> +}
>> +
>> +static inline int pte_swp_exclusive(pte_t pte)
>> +{
>> +	return pte_val(pte) & PTE_SWP_EXCLUSIVE;
>> +}
>> +
>> +static inline pte_t pte_swp_clear_exclusive(pte_t pte)
>> +{
>> +	return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
>> +}
>> +
>>  #ifdef CONFIG_NUMA_BALANCING
>>  /*
>>   * See the comment in include/linux/pgtable.h
>> @@ -908,7 +924,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>>  
>>  /*
>>   * Encode and decode a swap entry:
>> - *	bits 0-1:	present (must be zero)
>> + *	bits 0:		present (must be zero)
>> + *	bits 1:		remember PG_anon_exclusive
> 
> It looks fine to me.
> 
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> 

Great, thanks!
Will Deacon March 21, 2022, 2:38 p.m. UTC | #7
On Wed, Mar 16, 2022 at 06:27:01PM +0000, Catalin Marinas wrote:
> On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
> > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> > index b1e1b74d993c..62e0ebeed720 100644
> > --- a/arch/arm64/include/asm/pgtable-prot.h
> > +++ b/arch/arm64/include/asm/pgtable-prot.h
> > @@ -14,6 +14,7 @@
> >   * Software defined PTE bits definition.
> >   */
> >  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
> > +#define PTE_SWP_EXCLUSIVE	(_AT(pteval_t, 1) << 2)	 /* only for swp ptes */
> 
> I think we can use bit 1 here.
> 
> > @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> >  /*
> >   * Encode and decode a swap entry:
> >   *	bits 0-1:	present (must be zero)
> > - *	bits 2-7:	swap type
> > + *	bits 2:		remember PG_anon_exclusive
> > + *	bits 3-7:	swap type
> >   *	bits 8-57:	swap offset
> >   *	bit  58:	PTE_PROT_NONE (must be zero)
> 
> I don't remember exactly why we reserved bits 0 and 1 when, from the
> hardware perspective, it's sufficient for bit 0 to be 0 and the whole
> pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
> level, it's a huge page) but we shouldn't check for this on a swap
> entry.

I'm a little worried that when we're dealing with huge mappings at the
PMD level we might lose the ability to distinguish them from a pte-level
mapping with this new flag set if we use bit 1. A similar issue to this
was fixed a long time ago by 59911ca4325d ("ARM64: mm: Move PTE_PROT_NONE
bit") when we used to use bit 1 for PTE_PROT_NONE.

Is something like:

	pmd_to_swp_entry(swp_entry_to_pmd(pmd));

supposed to preserve the original pmd? I'm not sure that's guaranteed
after this change if bit 1 can be cleared in the process -- we could end
up with a pte, which the hardware would interpret as a table entry and
end up with really bad things happening.

Will
Will Deacon March 21, 2022, 2:39 p.m. UTC | #8
On Mon, Mar 21, 2022 at 02:38:02PM +0000, Will Deacon wrote:
> On Wed, Mar 16, 2022 at 06:27:01PM +0000, Catalin Marinas wrote:
> > On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
> > > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> > > index b1e1b74d993c..62e0ebeed720 100644
> > > --- a/arch/arm64/include/asm/pgtable-prot.h
> > > +++ b/arch/arm64/include/asm/pgtable-prot.h
> > > @@ -14,6 +14,7 @@
> > >   * Software defined PTE bits definition.
> > >   */
> > >  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
> > > +#define PTE_SWP_EXCLUSIVE	(_AT(pteval_t, 1) << 2)	 /* only for swp ptes */
> > 
> > I think we can use bit 1 here.
> > 
> > > @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> > >  /*
> > >   * Encode and decode a swap entry:
> > >   *	bits 0-1:	present (must be zero)
> > > - *	bits 2-7:	swap type
> > > + *	bits 2:		remember PG_anon_exclusive
> > > + *	bits 3-7:	swap type
> > >   *	bits 8-57:	swap offset
> > >   *	bit  58:	PTE_PROT_NONE (must be zero)
> > 
> > I don't remember exactly why we reserved bits 0 and 1 when, from the
> > hardware perspective, it's sufficient for bit 0 to be 0 and the whole
> > pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
> > level, it's a huge page) but we shouldn't check for this on a swap
> > entry.
> 
> I'm a little worried that when we're dealing with huge mappings at the
> PMD level we might lose the ability to distinguish them from a pte-level
> mapping with this new flag set if we use bit 1. A similar issue to this
> was fixed a long time ago by 59911ca4325d ("ARM64: mm: Move PTE_PROT_NONE
> bit") when we used to use bit 1 for PTE_PROT_NONE.
> 
> Is something like:
> 
> 	pmd_to_swp_entry(swp_entry_to_pmd(pmd));
> 
> supposed to preserve the original pmd? I'm not sure that's guaranteed
> after this change if bit 1 can be cleared in the process -- we could end
> up with a pte, which the hardware would interpret as a table entry and
> end up with really bad things happening.

(I got this back to front: having the bit set rather than cleared would
be an issue, but the overall point remains).

Will
David Hildenbrand March 21, 2022, 3:07 p.m. UTC | #9
On 21.03.22 15:38, Will Deacon wrote:
> On Wed, Mar 16, 2022 at 06:27:01PM +0000, Catalin Marinas wrote:
>> On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
>>> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
>>> index b1e1b74d993c..62e0ebeed720 100644
>>> --- a/arch/arm64/include/asm/pgtable-prot.h
>>> +++ b/arch/arm64/include/asm/pgtable-prot.h
>>> @@ -14,6 +14,7 @@
>>>   * Software defined PTE bits definition.
>>>   */
>>>  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
>>> +#define PTE_SWP_EXCLUSIVE	(_AT(pteval_t, 1) << 2)	 /* only for swp ptes */
>>
>> I think we can use bit 1 here.
>>
>>> @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>>>  /*
>>>   * Encode and decode a swap entry:
>>>   *	bits 0-1:	present (must be zero)
>>> - *	bits 2-7:	swap type
>>> + *	bits 2:		remember PG_anon_exclusive
>>> + *	bits 3-7:	swap type
>>>   *	bits 8-57:	swap offset
>>>   *	bit  58:	PTE_PROT_NONE (must be zero)
>>
>> I don't remember exactly why we reserved bits 0 and 1 when, from the
>> hardware perspective, it's sufficient for bit 0 to be 0 and the whole
>> pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
>> level, it's a huge page) but we shouldn't check for this on a swap
>> entry.
> 
> I'm a little worried that when we're dealing with huge mappings at the
> PMD level we might lose the ability to distinguish them from a pte-level
> mapping with this new flag set if we use bit 1. A similar issue to this
> was fixed a long time ago by 59911ca4325d ("ARM64: mm: Move PTE_PROT_NONE
> bit") when we used to use bit 1 for PTE_PROT_NONE.
> 
> Is something like:
> 
> 	pmd_to_swp_entry(swp_entry_to_pmd(pmd));

Note that __HAVE_ARCH_PTE_SWP_EXCLUSIVE currently only applies to actual
swap entries, not non-swap entries (migration, hwpoison, ...). So it
really only applies to PTEs -- PMDs are not applicable.

So the example you gave cannot possibly have that bit set. From what I
understand, it should be fine. But I have no real preference: I can also
just stick to the original patch, whatever you prefer.

Thanks!
Will Deacon March 21, 2022, 5:44 p.m. UTC | #10
On Mon, Mar 21, 2022 at 04:07:48PM +0100, David Hildenbrand wrote:
> On 21.03.22 15:38, Will Deacon wrote:
> > On Wed, Mar 16, 2022 at 06:27:01PM +0000, Catalin Marinas wrote:
> >> On Tue, Mar 15, 2022 at 03:18:34PM +0100, David Hildenbrand wrote:
> >>> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> >>> index b1e1b74d993c..62e0ebeed720 100644
> >>> --- a/arch/arm64/include/asm/pgtable-prot.h
> >>> +++ b/arch/arm64/include/asm/pgtable-prot.h
> >>> @@ -14,6 +14,7 @@
> >>>   * Software defined PTE bits definition.
> >>>   */
> >>>  #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
> >>> +#define PTE_SWP_EXCLUSIVE	(_AT(pteval_t, 1) << 2)	 /* only for swp ptes */
> >>
> >> I think we can use bit 1 here.
> >>
> >>> @@ -909,12 +925,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
> >>>  /*
> >>>   * Encode and decode a swap entry:
> >>>   *	bits 0-1:	present (must be zero)
> >>> - *	bits 2-7:	swap type
> >>> + *	bits 2:		remember PG_anon_exclusive
> >>> + *	bits 3-7:	swap type
> >>>   *	bits 8-57:	swap offset
> >>>   *	bit  58:	PTE_PROT_NONE (must be zero)
> >>
> >> I don't remember exactly why we reserved bits 0 and 1 when, from the
> >> hardware perspective, it's sufficient for bit 0 to be 0 and the whole
> >> pte becomes invalid. We use bit 1 as the 'table' bit (when 0 at pmd
> >> level, it's a huge page) but we shouldn't check for this on a swap
> >> entry.
> > 
> > I'm a little worried that when we're dealing with huge mappings at the
> > PMD level we might lose the ability to distinguish them from a pte-level
> > mapping with this new flag set if we use bit 1. A similar issue to this
> > was fixed a long time ago by 59911ca4325d ("ARM64: mm: Move PTE_PROT_NONE
> > bit") when we used to use bit 1 for PTE_PROT_NONE.
> > 
> > Is something like:
> > 
> > 	pmd_to_swp_entry(swp_entry_to_pmd(pmd));
> 
> Note that __HAVE_ARCH_PTE_SWP_EXCLUSIVE currently only applies to actual
> swap entries, not non-swap entries (migration, hwpoison, ...). So it
> really only applies to PTEs -- PMDs are not applicable.

Right, thanks for the clarification.

> So the example you gave cannot possibly have that bit set. From what I
> understand, it should be fine. But I have no real preference: I can also
> just stick to the original patch, whatever you prefer.

I think I'd prefer to stay on the safe side and stick with bit 2 as you
originally proposed. If we need to support crazy numbers of swapfiles
in future then we can revisit the idea of allocating bit 1.

Thanks, and sorry for the trouble.

Will
Catalin Marinas March 21, 2022, 6:27 p.m. UTC | #11
On Mon, Mar 21, 2022 at 05:44:05PM +0000, Will Deacon wrote:
> On Mon, Mar 21, 2022 at 04:07:48PM +0100, David Hildenbrand wrote:
> > So the example you gave cannot possibly have that bit set. From what I
> > understand, it should be fine. But I have no real preference: I can also
> > just stick to the original patch, whatever you prefer.
> 
> I think I'd prefer to stay on the safe side and stick with bit 2 as you
> originally proposed. If we need to support crazy numbers of swapfiles
> in future then we can revisit the idea of allocating bit 1.

Sounds fine to me. David, feel free to keep my reviewed-by on the
original patch.
David Hildenbrand March 22, 2022, 9:46 a.m. UTC | #12
On 21.03.22 19:27, Catalin Marinas wrote:
> On Mon, Mar 21, 2022 at 05:44:05PM +0000, Will Deacon wrote:
>> On Mon, Mar 21, 2022 at 04:07:48PM +0100, David Hildenbrand wrote:
>>> So the example you gave cannot possibly have that bit set. From what I
>>> understand, it should be fine. But I have no real preference: I can also
>>> just stick to the original patch, whatever you prefer.
>>
>> I think I'd prefer to stay on the safe side and stick with bit 2 as you
>> originally proposed. If we need to support crazy numbers of swapfiles
>> in future then we can revisit the idea of allocating bit 1.
> 
> Sounds fine to me. David, feel free to keep my reviewed-by on the
> original patch.
> 

Thanks both, I'll add the following comment to the patch:

"Note that we might be able to reuse bit 1, but reusing bit 1 turned out
 problematic in the past for PROT_NONE handling; so let's play safe and
 use another bit."
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index b1e1b74d993c..62e0ebeed720 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -14,6 +14,7 @@ 
  * Software defined PTE bits definition.
  */
 #define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
+#define PTE_SWP_EXCLUSIVE	(_AT(pteval_t, 1) << 2)	 /* only for swp ptes */
 #define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
 #define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
 #define PTE_DEVMAP		(_AT(pteval_t, 1) << 57)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 94e147e5456c..ad9b221963d4 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -402,6 +402,22 @@  static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
 	return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
 }
 
+#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+	return pte_val(pte) & PTE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * See the comment in include/linux/pgtable.h
@@ -909,12 +925,13 @@  static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 /*
  * Encode and decode a swap entry:
  *	bits 0-1:	present (must be zero)
- *	bits 2-7:	swap type
+ *	bits 2:		remember PG_anon_exclusive
+ *	bits 3-7:	swap type
  *	bits 8-57:	swap offset
  *	bit  58:	PTE_PROT_NONE (must be zero)
  */
-#define __SWP_TYPE_SHIFT	2
-#define __SWP_TYPE_BITS		6
+#define __SWP_TYPE_SHIFT	3
+#define __SWP_TYPE_BITS		5
 #define __SWP_OFFSET_BITS	50
 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
 #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)