diff mbox series

[v3,1/8] powerpc/uaccess: Add unsafe_copy_from_user

Message ID 20210109032557.13831-2-cmr@codefail.de (mailing list archive)
State Superseded
Headers show
Series Improve signal performance on PPC64 with KUAP | expand

Commit Message

Christopher M. Riedl Jan. 9, 2021, 3:25 a.m. UTC
Implement raw_copy_from_user_allowed() which assumes that userspace read
access is open. Use this new function to implement raw_copy_from_user().
Finally, wrap the new function to follow the usual "unsafe_" convention
of taking a label argument.

The new raw_copy_from_user_allowed() calls non-inline __copy_tofrom_user()
internally. This is still safe to call inside user access blocks formed
with user_*_access_begin()/user_*_access_end() since asm functions are not
instrumented for tracing.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/include/asm/uaccess.h | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

Comments

Christophe Leroy Jan. 11, 2021, 1:22 p.m. UTC | #1
Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
> Implement raw_copy_from_user_allowed() which assumes that userspace read
> access is open. Use this new function to implement raw_copy_from_user().
> Finally, wrap the new function to follow the usual "unsafe_" convention
> of taking a label argument.

I think there is no point implementing raw_copy_from_user_allowed(), see 
https://github.com/linuxppc/linux/commit/4b842e4e25b1 and 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/

You should simply do:

	#define unsafe_copy_from_user(d, s, l, e) \
		unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)


Christophe

> 
> The new raw_copy_from_user_allowed() calls non-inline __copy_tofrom_user()
> internally. This is still safe to call inside user access blocks formed
> with user_*_access_begin()/user_*_access_end() since asm functions are not
> instrumented for tracing.
> 
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> ---
>   arch/powerpc/include/asm/uaccess.h | 28 +++++++++++++++++++---------
>   1 file changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 501c9a79038c..698f3a6d6ae5 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -403,38 +403,45 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
>   }
>   #endif /* __powerpc64__ */
>   
> -static inline unsigned long raw_copy_from_user(void *to,
> -		const void __user *from, unsigned long n)
> +static inline unsigned long
> +raw_copy_from_user_allowed(void *to, const void __user *from, unsigned long n)
>   {
> -	unsigned long ret;
>   	if (__builtin_constant_p(n) && (n <= 8)) {
> -		ret = 1;
> +		unsigned long ret = 1;
>   
>   		switch (n) {
>   		case 1:
>   			barrier_nospec();
> -			__get_user_size(*(u8 *)to, from, 1, ret);
> +			__get_user_size_allowed(*(u8 *)to, from, 1, ret);
>   			break;
>   		case 2:
>   			barrier_nospec();
> -			__get_user_size(*(u16 *)to, from, 2, ret);
> +			__get_user_size_allowed(*(u16 *)to, from, 2, ret);
>   			break;
>   		case 4:
>   			barrier_nospec();
> -			__get_user_size(*(u32 *)to, from, 4, ret);
> +			__get_user_size_allowed(*(u32 *)to, from, 4, ret);
>   			break;
>   		case 8:
>   			barrier_nospec();
> -			__get_user_size(*(u64 *)to, from, 8, ret);
> +			__get_user_size_allowed(*(u64 *)to, from, 8, ret);
>   			break;
>   		}
>   		if (ret == 0)
>   			return 0;
>   	}
>   
> +	return __copy_tofrom_user((__force void __user *)to, from, n);
> +}
> +
> +static inline unsigned long
> +raw_copy_from_user(void *to, const void __user *from, unsigned long n)
> +{
> +	unsigned long ret;
> +
>   	barrier_nospec();
>   	allow_read_from_user(from, n);
> -	ret = __copy_tofrom_user((__force void __user *)to, from, n);
> +	ret = raw_copy_from_user_allowed(to, from, n);
>   	prevent_read_from_user(from, n);
>   	return ret;
>   }
> @@ -542,6 +549,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
>   #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
>   #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
>   
> +#define unsafe_copy_from_user(d, s, l, e) \
> +	unsafe_op_wrap(raw_copy_from_user_allowed(d, s, l), e)
> +
>   #define unsafe_copy_to_user(d, s, l, e) \
>   do {									\
>   	u8 __user *_dst = (u8 __user *)(d);				\
>
Christopher M. Riedl Jan. 17, 2021, 5:19 p.m. UTC | #2
On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>
>
> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
> > Implement raw_copy_from_user_allowed() which assumes that userspace read
> > access is open. Use this new function to implement raw_copy_from_user().
> > Finally, wrap the new function to follow the usual "unsafe_" convention
> > of taking a label argument.
>
> I think there is no point implementing raw_copy_from_user_allowed(), see
> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>
> You should simply do:
>
> #define unsafe_copy_from_user(d, s, l, e) \
> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>

I gave this a try and the signal ops decreased by ~8K. Now, to be
honest, I am not sure what an "acceptable" benchmark number here
actually is - so maybe this is ok? Same loss with both radix and hash:

	|                                      | hash   | radix  |
	| ------------------------------------ | ------ | ------ |
	| linuxppc/next                        | 118693 | 133296 |
	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
	| unsafe-signal64                      | 200480 | 234067 |
	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |

To put this into perspective, prior to KUAP and uaccess flush, signal
performance in this benchmark was ~290K on hash.

>
> Christophe
>
> > 
> > The new raw_copy_from_user_allowed() calls non-inline __copy_tofrom_user()
> > internally. This is still safe to call inside user access blocks formed
> > with user_*_access_begin()/user_*_access_end() since asm functions are not
> > instrumented for tracing.
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> > ---
> >   arch/powerpc/include/asm/uaccess.h | 28 +++++++++++++++++++---------
> >   1 file changed, 19 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> > index 501c9a79038c..698f3a6d6ae5 100644
> > --- a/arch/powerpc/include/asm/uaccess.h
> > +++ b/arch/powerpc/include/asm/uaccess.h
> > @@ -403,38 +403,45 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
> >   }
> >   #endif /* __powerpc64__ */
> >   
> > -static inline unsigned long raw_copy_from_user(void *to,
> > -		const void __user *from, unsigned long n)
> > +static inline unsigned long
> > +raw_copy_from_user_allowed(void *to, const void __user *from, unsigned long n)
> >   {
> > -	unsigned long ret;
> >   	if (__builtin_constant_p(n) && (n <= 8)) {
> > -		ret = 1;
> > +		unsigned long ret = 1;
> >   
> >   		switch (n) {
> >   		case 1:
> >   			barrier_nospec();
> > -			__get_user_size(*(u8 *)to, from, 1, ret);
> > +			__get_user_size_allowed(*(u8 *)to, from, 1, ret);
> >   			break;
> >   		case 2:
> >   			barrier_nospec();
> > -			__get_user_size(*(u16 *)to, from, 2, ret);
> > +			__get_user_size_allowed(*(u16 *)to, from, 2, ret);
> >   			break;
> >   		case 4:
> >   			barrier_nospec();
> > -			__get_user_size(*(u32 *)to, from, 4, ret);
> > +			__get_user_size_allowed(*(u32 *)to, from, 4, ret);
> >   			break;
> >   		case 8:
> >   			barrier_nospec();
> > -			__get_user_size(*(u64 *)to, from, 8, ret);
> > +			__get_user_size_allowed(*(u64 *)to, from, 8, ret);
> >   			break;
> >   		}
> >   		if (ret == 0)
> >   			return 0;
> >   	}
> >   
> > +	return __copy_tofrom_user((__force void __user *)to, from, n);
> > +}
> > +
> > +static inline unsigned long
> > +raw_copy_from_user(void *to, const void __user *from, unsigned long n)
> > +{
> > +	unsigned long ret;
> > +
> >   	barrier_nospec();
> >   	allow_read_from_user(from, n);
> > -	ret = __copy_tofrom_user((__force void __user *)to, from, n);
> > +	ret = raw_copy_from_user_allowed(to, from, n);
> >   	prevent_read_from_user(from, n);
> >   	return ret;
> >   }
> > @@ -542,6 +549,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
> >   #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
> >   #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
> >   
> > +#define unsafe_copy_from_user(d, s, l, e) \
> > +	unsafe_op_wrap(raw_copy_from_user_allowed(d, s, l), e)
> > +
> >   #define unsafe_copy_to_user(d, s, l, e) \
> >   do {									\
> >   	u8 __user *_dst = (u8 __user *)(d);				\
> >
Michael Ellerman Jan. 19, 2021, 2:11 a.m. UTC | #3
"Christopher M. Riedl" <cmr@codefail.de> writes:
> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
>> > Implement raw_copy_from_user_allowed() which assumes that userspace read
>> > access is open. Use this new function to implement raw_copy_from_user().
>> > Finally, wrap the new function to follow the usual "unsafe_" convention
>> > of taking a label argument.
>>
>> I think there is no point implementing raw_copy_from_user_allowed(), see
>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>>
>> You should simply do:
>>
>> #define unsafe_copy_from_user(d, s, l, e) \
>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>>
>
> I gave this a try and the signal ops decreased by ~8K. Now, to be
> honest, I am not sure what an "acceptable" benchmark number here
> actually is - so maybe this is ok? Same loss with both radix and hash:
>
> 	|                                      | hash   | radix  |
> 	| ------------------------------------ | ------ | ------ |
> 	| linuxppc/next                        | 118693 | 133296 |
> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
> 	| unsafe-signal64                      | 200480 | 234067 |
> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
>
> To put this into perspective, prior to KUAP and uaccess flush, signal
> performance in this benchmark was ~290K on hash.

If I'm doing the math right 8K is ~4% of the best number.

It seems like 4% is worth a few lines of code to handle these constant
sizes. It's not like we have performance to throw away.

Or, we should chase down where the call sites are that are doing small
constant copies with copy_to/from_user() and change them to use
get/put_user().

cheers
Christophe Leroy Jan. 19, 2021, 7:29 a.m. UTC | #4
Le 17/01/2021 à 18:19, Christopher M. Riedl a écrit :
> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>>
>>
>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
>>> Implement raw_copy_from_user_allowed() which assumes that userspace read
>>> access is open. Use this new function to implement raw_copy_from_user().
>>> Finally, wrap the new function to follow the usual "unsafe_" convention
>>> of taking a label argument.
>>
>> I think there is no point implementing raw_copy_from_user_allowed(), see
>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>>
>> You should simply do:
>>
>> #define unsafe_copy_from_user(d, s, l, e) \
>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>>
> 
> I gave this a try and the signal ops decreased by ~8K. Now, to be
> honest, I am not sure what an "acceptable" benchmark number here
> actually is - so maybe this is ok? Same loss with both radix and hash:

I don't think this is ok, but it probably means that you are using unsafe_copy_from_user() to copy 
small constant size data that should be copied with unsafe_get_user() instead.

> 
> 	|                                      | hash   | radix  |
> 	| ------------------------------------ | ------ | ------ |
> 	| linuxppc/next                        | 118693 | 133296 |
> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
> 	| unsafe-signal64                      | 200480 | 234067 |
> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
> 
> To put this into perspective, prior to KUAP and uaccess flush, signal
> performance in this benchmark was ~290K on hash.
> 
>>
>> Christophe
>>
>>>
>>> The new raw_copy_from_user_allowed() calls non-inline __copy_tofrom_user()
>>> internally. This is still safe to call inside user access blocks formed
>>> with user_*_access_begin()/user_*_access_end() since asm functions are not
>>> instrumented for tracing.
>>>
>>> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
>>> ---
>>>    arch/powerpc/include/asm/uaccess.h | 28 +++++++++++++++++++---------
>>>    1 file changed, 19 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
>>> index 501c9a79038c..698f3a6d6ae5 100644
>>> --- a/arch/powerpc/include/asm/uaccess.h
>>> +++ b/arch/powerpc/include/asm/uaccess.h
>>> @@ -403,38 +403,45 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
>>>    }
>>>    #endif /* __powerpc64__ */
>>>    
>>> -static inline unsigned long raw_copy_from_user(void *to,
>>> -		const void __user *from, unsigned long n)
>>> +static inline unsigned long
>>> +raw_copy_from_user_allowed(void *to, const void __user *from, unsigned long n)
>>>    {
>>> -	unsigned long ret;
>>>    	if (__builtin_constant_p(n) && (n <= 8)) {
>>> -		ret = 1;
>>> +		unsigned long ret = 1;
>>>    
>>>    		switch (n) {
>>>    		case 1:
>>>    			barrier_nospec();
>>> -			__get_user_size(*(u8 *)to, from, 1, ret);
>>> +			__get_user_size_allowed(*(u8 *)to, from, 1, ret);
>>>    			break;
>>>    		case 2:
>>>    			barrier_nospec();
>>> -			__get_user_size(*(u16 *)to, from, 2, ret);
>>> +			__get_user_size_allowed(*(u16 *)to, from, 2, ret);
>>>    			break;
>>>    		case 4:
>>>    			barrier_nospec();
>>> -			__get_user_size(*(u32 *)to, from, 4, ret);
>>> +			__get_user_size_allowed(*(u32 *)to, from, 4, ret);
>>>    			break;
>>>    		case 8:
>>>    			barrier_nospec();
>>> -			__get_user_size(*(u64 *)to, from, 8, ret);
>>> +			__get_user_size_allowed(*(u64 *)to, from, 8, ret);
>>>    			break;
>>>    		}
>>>    		if (ret == 0)
>>>    			return 0;
>>>    	}
>>>    
>>> +	return __copy_tofrom_user((__force void __user *)to, from, n);
>>> +}
>>> +
>>> +static inline unsigned long
>>> +raw_copy_from_user(void *to, const void __user *from, unsigned long n)
>>> +{
>>> +	unsigned long ret;
>>> +
>>>    	barrier_nospec();
>>>    	allow_read_from_user(from, n);
>>> -	ret = __copy_tofrom_user((__force void __user *)to, from, n);
>>> +	ret = raw_copy_from_user_allowed(to, from, n);
>>>    	prevent_read_from_user(from, n);
>>>    	return ret;
>>>    }
>>> @@ -542,6 +549,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
>>>    #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
>>>    #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
>>>    
>>> +#define unsafe_copy_from_user(d, s, l, e) \
>>> +	unsafe_op_wrap(raw_copy_from_user_allowed(d, s, l), e)
>>> +
>>>    #define unsafe_copy_to_user(d, s, l, e) \
>>>    do {									\
>>>    	u8 __user *_dst = (u8 __user *)(d);				\
>>>
Christophe Leroy Jan. 19, 2021, 12:33 p.m. UTC | #5
Le 19/01/2021 à 03:11, Michael Ellerman a écrit :
> "Christopher M. Riedl" <cmr@codefail.de> writes:
>> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
>>>> Implement raw_copy_from_user_allowed() which assumes that userspace read
>>>> access is open. Use this new function to implement raw_copy_from_user().
>>>> Finally, wrap the new function to follow the usual "unsafe_" convention
>>>> of taking a label argument.
>>>
>>> I think there is no point implementing raw_copy_from_user_allowed(), see
>>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
>>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>>>
>>> You should simply do:
>>>
>>> #define unsafe_copy_from_user(d, s, l, e) \
>>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>>>
>>
>> I gave this a try and the signal ops decreased by ~8K. Now, to be
>> honest, I am not sure what an "acceptable" benchmark number here
>> actually is - so maybe this is ok? Same loss with both radix and hash:
>>
>> 	|                                      | hash   | radix  |
>> 	| ------------------------------------ | ------ | ------ |
>> 	| linuxppc/next                        | 118693 | 133296 |
>> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
>> 	| unsafe-signal64                      | 200480 | 234067 |
>> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
>>
>> To put this into perspective, prior to KUAP and uaccess flush, signal
>> performance in this benchmark was ~290K on hash.
> 
> If I'm doing the math right 8K is ~4% of the best number.
> 
> It seems like 4% is worth a few lines of code to handle these constant
> sizes. It's not like we have performance to throw away.
> 
> Or, we should chase down where the call sites are that are doing small
> constant copies with copy_to/from_user() and change them to use
> get/put_user().
> 

Christopher, when you say you gave it a try, is I my series or only the following ?

#define unsafe_copy_from_user(d, s, l, e) \
unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)

Because I see no use of unsafe_copy_from_user() that would explain that.

Christophe
Christopher M. Riedl Jan. 19, 2021, 5:02 p.m. UTC | #6
On Tue Jan 19, 2021 at 6:33 AM CST, Christophe Leroy wrote:
>
>
> Le 19/01/2021 à 03:11, Michael Ellerman a écrit :
> > "Christopher M. Riedl" <cmr@codefail.de> writes:
> >> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
> >>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
> >>>> Implement raw_copy_from_user_allowed() which assumes that userspace read
> >>>> access is open. Use this new function to implement raw_copy_from_user().
> >>>> Finally, wrap the new function to follow the usual "unsafe_" convention
> >>>> of taking a label argument.
> >>>
> >>> I think there is no point implementing raw_copy_from_user_allowed(), see
> >>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
> >>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
> >>>
> >>> You should simply do:
> >>>
> >>> #define unsafe_copy_from_user(d, s, l, e) \
> >>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
> >>>
> >>
> >> I gave this a try and the signal ops decreased by ~8K. Now, to be
> >> honest, I am not sure what an "acceptable" benchmark number here
> >> actually is - so maybe this is ok? Same loss with both radix and hash:
> >>
> >> 	|                                      | hash   | radix  |
> >> 	| ------------------------------------ | ------ | ------ |
> >> 	| linuxppc/next                        | 118693 | 133296 |
> >> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
> >> 	| unsafe-signal64                      | 200480 | 234067 |
> >> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
> >>
> >> To put this into perspective, prior to KUAP and uaccess flush, signal
> >> performance in this benchmark was ~290K on hash.
> > 
> > If I'm doing the math right 8K is ~4% of the best number.
> > 
> > It seems like 4% is worth a few lines of code to handle these constant
> > sizes. It's not like we have performance to throw away.
> > 
> > Or, we should chase down where the call sites are that are doing small
> > constant copies with copy_to/from_user() and change them to use
> > get/put_user().
> > 
>
> Christopher, when you say you gave it a try, is I my series or only the
> following ?
>
> #define unsafe_copy_from_user(d, s, l, e) \
> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>

I only used the above to replace this patch in my series (so none of my
changes implementing raw_copy_from_user_allowed() are included).

>
> Because I see no use of unsafe_copy_from_user() that would explain that.
>
> Christophe
Christophe Leroy Jan. 19, 2021, 5:27 p.m. UTC | #7
Le 19/01/2021 à 18:02, Christopher M. Riedl a écrit :
> On Tue Jan 19, 2021 at 6:33 AM CST, Christophe Leroy wrote:
>>
>>
>> Le 19/01/2021 à 03:11, Michael Ellerman a écrit :
>>> "Christopher M. Riedl" <cmr@codefail.de> writes:
>>>> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>>>>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
>>>>>> Implement raw_copy_from_user_allowed() which assumes that userspace read
>>>>>> access is open. Use this new function to implement raw_copy_from_user().
>>>>>> Finally, wrap the new function to follow the usual "unsafe_" convention
>>>>>> of taking a label argument.
>>>>>
>>>>> I think there is no point implementing raw_copy_from_user_allowed(), see
>>>>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
>>>>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>>>>>
>>>>> You should simply do:
>>>>>
>>>>> #define unsafe_copy_from_user(d, s, l, e) \
>>>>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>>>>>
>>>>
>>>> I gave this a try and the signal ops decreased by ~8K. Now, to be
>>>> honest, I am not sure what an "acceptable" benchmark number here
>>>> actually is - so maybe this is ok? Same loss with both radix and hash:
>>>>
>>>> 	|                                      | hash   | radix  |
>>>> 	| ------------------------------------ | ------ | ------ |
>>>> 	| linuxppc/next                        | 118693 | 133296 |
>>>> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
>>>> 	| unsafe-signal64                      | 200480 | 234067 |
>>>> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
>>>>
>>>> To put this into perspective, prior to KUAP and uaccess flush, signal
>>>> performance in this benchmark was ~290K on hash.
>>>
>>> If I'm doing the math right 8K is ~4% of the best number.
>>>
>>> It seems like 4% is worth a few lines of code to handle these constant
>>> sizes. It's not like we have performance to throw away.
>>>
>>> Or, we should chase down where the call sites are that are doing small
>>> constant copies with copy_to/from_user() and change them to use
>>> get/put_user().
>>>
>>
>> Christopher, when you say you gave it a try, is I my series or only the
>> following ?
>>
>> #define unsafe_copy_from_user(d, s, l, e) \
>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>>
> 
> I only used the above to replace this patch in my series (so none of my
> changes implementing raw_copy_from_user_allowed() are included).

Then I see no reason why the performance would be different, because you only call 
unsafe_copy_from_user() with non trivial lengthes.

> 
>>
>> Because I see no use of unsafe_copy_from_user() that would explain that.
>>
>> Christophe
Christopher M. Riedl Jan. 20, 2021, 5:08 a.m. UTC | #8
On Tue Jan 19, 2021 at 11:27 AM CST, Christophe Leroy wrote:
>
>
> Le 19/01/2021 à 18:02, Christopher M. Riedl a écrit :
> > On Tue Jan 19, 2021 at 6:33 AM CST, Christophe Leroy wrote:
> >>
> >>
> >> Le 19/01/2021 à 03:11, Michael Ellerman a écrit :
> >>> "Christopher M. Riedl" <cmr@codefail.de> writes:
> >>>> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
> >>>>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
> >>>>>> Implement raw_copy_from_user_allowed() which assumes that userspace read
> >>>>>> access is open. Use this new function to implement raw_copy_from_user().
> >>>>>> Finally, wrap the new function to follow the usual "unsafe_" convention
> >>>>>> of taking a label argument.
> >>>>>
> >>>>> I think there is no point implementing raw_copy_from_user_allowed(), see
> >>>>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
> >>>>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
> >>>>>
> >>>>> You should simply do:
> >>>>>
> >>>>> #define unsafe_copy_from_user(d, s, l, e) \
> >>>>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
> >>>>>
> >>>>
> >>>> I gave this a try and the signal ops decreased by ~8K. Now, to be
> >>>> honest, I am not sure what an "acceptable" benchmark number here
> >>>> actually is - so maybe this is ok? Same loss with both radix and hash:
> >>>>
> >>>> 	|                                      | hash   | radix  |
> >>>> 	| ------------------------------------ | ------ | ------ |
> >>>> 	| linuxppc/next                        | 118693 | 133296 |
> >>>> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
> >>>> 	| unsafe-signal64                      | 200480 | 234067 |
> >>>> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
> >>>>
> >>>> To put this into perspective, prior to KUAP and uaccess flush, signal
> >>>> performance in this benchmark was ~290K on hash.
> >>>
> >>> If I'm doing the math right 8K is ~4% of the best number.
> >>>
> >>> It seems like 4% is worth a few lines of code to handle these constant
> >>> sizes. It's not like we have performance to throw away.
> >>>
> >>> Or, we should chase down where the call sites are that are doing small
> >>> constant copies with copy_to/from_user() and change them to use
> >>> get/put_user().
> >>>
> >>
> >> Christopher, when you say you gave it a try, is I my series or only the
> >> following ?
> >>
> >> #define unsafe_copy_from_user(d, s, l, e) \
> >> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
> >>
> > 
> > I only used the above to replace this patch in my series (so none of my
> > changes implementing raw_copy_from_user_allowed() are included).
>
> Then I see no reason why the performance would be different, because you
> only call
> unsafe_copy_from_user() with non trivial lengthes.
>

Ok I made a mistake - this actually included the other improvements from
your feedback on this series; specifically moving the unsafe_get_user()
call to read the msr regs value into the #ifdef. My first pass resulted
in another __get_user() call which explains some of the perf loss.

With that mistake fixed, the benchmark performance is still only ~197k
on hash (consistent). I agree that there are no places where
unsafe_copy_from_user() is called with trivial lengths so the only
conclusion I can draw is that the changes in this patch marginally
speed-up the other __copy_from_user() calls. I started comparing the
disassembly but nothing immediately obvious stands out to me. In fact, I
observe the speed-up even if I keep this patch _and_ apply this change:

#define unsafe_copy_from_user(d, s, l, e) \
unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)

Related to that, I think sigset_t is always 8B on ppc64 so these will
probably need a wrapper if we pick-up your series to get rid of trivial
size optimizations:

__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))

I can bring performance up to ~200K on hash again by replacing the two
__copy_from_user(&set, ...) with direct calls to __get_user_size() (it's
kind of hacky I think). See the last commit in this tree:

https://git.sr.ht/~cmr/linux/log/unsafe-signal64-v4

All my comments apply to performance on radix as well.

> > 
> >>
> >> Because I see no use of unsafe_copy_from_user() that would explain that.
> >>
> >> Christophe
Christophe Leroy Feb. 9, 2021, 2:09 p.m. UTC | #9
Le 19/01/2021 à 03:11, Michael Ellerman a écrit :
> "Christopher M. Riedl" <cmr@codefail.de> writes:
>> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
>>>> Implement raw_copy_from_user_allowed() which assumes that userspace read
>>>> access is open. Use this new function to implement raw_copy_from_user().
>>>> Finally, wrap the new function to follow the usual "unsafe_" convention
>>>> of taking a label argument.
>>>
>>> I think there is no point implementing raw_copy_from_user_allowed(), see
>>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
>>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>>>
>>> You should simply do:
>>>
>>> #define unsafe_copy_from_user(d, s, l, e) \
>>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>>>
>>
>> I gave this a try and the signal ops decreased by ~8K. Now, to be
>> honest, I am not sure what an "acceptable" benchmark number here
>> actually is - so maybe this is ok? Same loss with both radix and hash:
>>
>> 	|                                      | hash   | radix  |
>> 	| ------------------------------------ | ------ | ------ |
>> 	| linuxppc/next                        | 118693 | 133296 |
>> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
>> 	| unsafe-signal64                      | 200480 | 234067 |
>> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
>>
>> To put this into perspective, prior to KUAP and uaccess flush, signal
>> performance in this benchmark was ~290K on hash.
> 
> If I'm doing the math right 8K is ~4% of the best number.
> 
> It seems like 4% is worth a few lines of code to handle these constant
> sizes. It's not like we have performance to throw away.
> 
> Or, we should chase down where the call sites are that are doing small
> constant copies with copy_to/from_user() and change them to use
> get/put_user().
> 

I have built pmac32_defconfig and ppc64_defconfig with a BUILD_BUG_ON(__builtin_constant_p(n) && (n 
== 1 || n == 2 || n == 4 || n == 8) in raw_copy_from_user() and raw_copy_to_user():

On pmac32_defconfig, no hit.

On ppc64_defconfig, two hits:
- copies of sigset_t in signal64. This problem is only on linux/next. On next-test we don't have 
this problem anymore thanks to the series from Christopher.
- in pkey_set() in arch/powerpc/kernel/ptrace/ptrace-view.c, in the copy of new_amr. This is not a 
hot path I think so we can live with it.

Christophe
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 501c9a79038c..698f3a6d6ae5 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -403,38 +403,45 @@  raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 }
 #endif /* __powerpc64__ */
 
-static inline unsigned long raw_copy_from_user(void *to,
-		const void __user *from, unsigned long n)
+static inline unsigned long
+raw_copy_from_user_allowed(void *to, const void __user *from, unsigned long n)
 {
-	unsigned long ret;
 	if (__builtin_constant_p(n) && (n <= 8)) {
-		ret = 1;
+		unsigned long ret = 1;
 
 		switch (n) {
 		case 1:
 			barrier_nospec();
-			__get_user_size(*(u8 *)to, from, 1, ret);
+			__get_user_size_allowed(*(u8 *)to, from, 1, ret);
 			break;
 		case 2:
 			barrier_nospec();
-			__get_user_size(*(u16 *)to, from, 2, ret);
+			__get_user_size_allowed(*(u16 *)to, from, 2, ret);
 			break;
 		case 4:
 			barrier_nospec();
-			__get_user_size(*(u32 *)to, from, 4, ret);
+			__get_user_size_allowed(*(u32 *)to, from, 4, ret);
 			break;
 		case 8:
 			barrier_nospec();
-			__get_user_size(*(u64 *)to, from, 8, ret);
+			__get_user_size_allowed(*(u64 *)to, from, 8, ret);
 			break;
 		}
 		if (ret == 0)
 			return 0;
 	}
 
+	return __copy_tofrom_user((__force void __user *)to, from, n);
+}
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	unsigned long ret;
+
 	barrier_nospec();
 	allow_read_from_user(from, n);
-	ret = __copy_tofrom_user((__force void __user *)to, from, n);
+	ret = raw_copy_from_user_allowed(to, from, n);
 	prevent_read_from_user(from, n);
 	return ret;
 }
@@ -542,6 +549,9 @@  user_write_access_begin(const void __user *ptr, size_t len)
 #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
 #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
 
+#define unsafe_copy_from_user(d, s, l, e) \
+	unsafe_op_wrap(raw_copy_from_user_allowed(d, s, l), e)
+
 #define unsafe_copy_to_user(d, s, l, e) \
 do {									\
 	u8 __user *_dst = (u8 __user *)(d);				\