diff mbox series

[1/6] powerpc/code-patching: Implement generic text patching function

Message ID 20220916062330.430468-2-bgray@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series Out-of-line static calls for powerpc64 ELF V2 | expand

Commit Message

Benjamin Gray Sept. 16, 2022, 6:23 a.m. UTC
Adds a generic text patching mechanism for patches of 1, 2, 4, or 8
bytes. The patcher conditionally syncs the icache depending on if
the content will be executed (as opposed to, e.g., read-only data).

The `patch_instruction` function is reimplemented in terms of this
more generic function. This generic implementation allows patching of
arbitrary 64-bit data, whereas the original `patch_instruction` decided
the size based on the 'instruction' opcode, so was not suitable for
arbitrary data.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
---
 arch/powerpc/include/asm/code-patching.h |   1 +
 arch/powerpc/lib/code-patching.c         | 135 +++++++++++++++--------
 2 files changed, 89 insertions(+), 47 deletions(-)

Comments

kernel test robot Sept. 16, 2022, 9:39 a.m. UTC | #1
Hi Benjamin,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20220916]
[cannot apply to linus/master v6.0-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220916-142951
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allnoconfig
compiler: powerpc-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/31a9d4d694a3a20129f20390f3d7af2c154c8ed1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220916-142951
        git checkout 31a9d4d694a3a20129f20390f3d7af2c154c8ed1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/lib/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/powerpc/lib/code-patching.c:235:7: error: no previous prototype for 'patch_memory' [-Werror=missing-prototypes]
     235 | void *patch_memory(void *dest, const void *src, size_t size)
         |       ^~~~~~~~~~~~
   arch/powerpc/lib/code-patching.c: In function 'patch_instruction':
   arch/powerpc/lib/code-patching.c:248:24: error: implicit declaration of function 'patch_text'; did you mean 'path_get'? [-Werror=implicit-function-declaration]
     248 |                 return patch_text(addr, &val, sizeof(val), true);
         |                        ^~~~~~~~~~
         |                        path_get
   arch/powerpc/lib/code-patching.c: At top level:
>> arch/powerpc/lib/code-patching.c:230:12: error: 'do_patch_instruction' defined but not used [-Werror=unused-function]
     230 | static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
         |            ^~~~~~~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors


vim +/patch_memory +235 arch/powerpc/lib/code-patching.c

   229	
 > 230	static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
   231	{
   232		return raw_patch_instruction(addr, instr);
   233	}
   234	
 > 235	void *patch_memory(void *dest, const void *src, size_t size)
   236	{
   237		return memcpy(dest, src, size);
   238	}
   239
kernel test robot Sept. 16, 2022, 11:16 a.m. UTC | #2
Hi Benjamin,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on powerpc/next]
[also build test WARNING on next-20220916]
[cannot apply to linus/master v6.0-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220916-142951
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig (https://download.01.org/0day-ci/archive/20220916/202209161957.R4ivN9mo-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/31a9d4d694a3a20129f20390f3d7af2c154c8ed1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220916-142951
        git checkout 31a9d4d694a3a20129f20390f3d7af2c154c8ed1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/lib/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> arch/powerpc/lib/code-patching.c:235:7: warning: no previous prototype for 'patch_memory' [-Wmissing-prototypes]
     235 | void *patch_memory(void *dest, const void *src, size_t size)
         |       ^~~~~~~~~~~~
   arch/powerpc/lib/code-patching.c: In function 'patch_instruction':
   arch/powerpc/lib/code-patching.c:248:24: error: implicit declaration of function 'patch_text'; did you mean 'path_get'? [-Werror=implicit-function-declaration]
     248 |                 return patch_text(addr, &val, sizeof(val), true);
         |                        ^~~~~~~~~~
         |                        path_get
   arch/powerpc/lib/code-patching.c: At top level:
   arch/powerpc/lib/code-patching.c:230:12: warning: 'do_patch_instruction' defined but not used [-Wunused-function]
     230 | static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
         |            ^~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/patch_memory +235 arch/powerpc/lib/code-patching.c

   234	
 > 235	void *patch_memory(void *dest, const void *src, size_t size)
   236	{
   237		return memcpy(dest, src, size);
   238	}
   239
Christophe Leroy Sept. 19, 2022, 6:04 a.m. UTC | #3
Le 16/09/2022 à 08:23, Benjamin Gray a écrit :
> Adds a generic text patching mechanism for patches of 1, 2, 4, or 8
> bytes. The patcher conditionally syncs the icache depending on if
> the content will be executed (as opposed to, e.g., read-only data).
> 
> The `patch_instruction` function is reimplemented in terms of this
> more generic function. This generic implementation allows patching of
> arbitrary 64-bit data, whereas the original `patch_instruction` decided
> the size based on the 'instruction' opcode, so was not suitable for
> arbitrary data.

With CONFIG_STRICT_KERNEL_RWX, this patches causes a 15% time increase 
for activation/deactivation of ftrace.

Without CONFIG_STRICT_KERNEL_RWX, it doesn't build.

> 
> Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h |   1 +
>   arch/powerpc/lib/code-patching.c         | 135 +++++++++++++++--------
>   2 files changed, 89 insertions(+), 47 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
> index 1c6316ec4b74..6a52c19dae46 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -76,6 +76,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>   int patch_branch(u32 *addr, unsigned long target, int flags);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_text_data(void *dest, const void *src, size_t size);
>   
>   static inline unsigned long patch_site_addr(s32 *site)
>   {
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index ad0cf3108dd0..a09a0898c2ce 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -3,6 +3,7 @@
>    *  Copyright 2008 Michael Ellerman, IBM Corporation.
>    */
>   
> +#include <linux/mm.h>
>   #include <linux/kprobes.h>
>   #include <linux/vmalloc.h>
>   #include <linux/init.h>
> @@ -14,32 +15,7 @@
>   #include <asm/page.h>
>   #include <asm/code-patching.h>
>   #include <asm/inst.h>
> -
> -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
> -{
> -	if (!ppc_inst_prefixed(instr)) {
> -		u32 val = ppc_inst_val(instr);
> -
> -		__put_kernel_nofault(patch_addr, &val, u32, failed);
> -	} else {
> -		u64 val = ppc_inst_as_ulong(instr);
> -
> -		__put_kernel_nofault(patch_addr, &val, u64, failed);
> -	}
> -
> -	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
> -							    "r" (exec_addr));
> -
> -	return 0;
> -
> -failed:
> -	return -EPERM;
> -}
> -
> -int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
> -{
> -	return __patch_instruction(addr, instr, addr);
> -}
> +#include <asm/cacheflush.h>
>   
>   #ifdef CONFIG_STRICT_KERNEL_RWX
>   static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
> @@ -147,16 +123,44 @@ static void unmap_patch_area(unsigned long addr)
>   	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int __patch_text(void *dest, const void *src, size_t size, bool is_exec, void *exec_addr)

Is 'text' a good name ? For me text mean executable code. Should it be 
__patch_memory() ?


Why pass src as a void * ? This forces data to go via the stack. Can't 
you pass it as a 'long' ?

> +{
> +	if (virt_to_pfn(dest) != virt_to_pfn(dest + size - 1))
> +		return -EFAULT;

Why do you need that new check ?

> +
> +	switch (size) {
> +		case 1:
> +			__put_kernel_nofault(dest, src, u8, failed);
> +			break;
> +		case 2:
> +			__put_kernel_nofault(dest, src, u16, failed);
> +			break;
> +		case 4:
> +			__put_kernel_nofault(dest, src, u32, failed);
> +			break;
> +		case 8:
> +			__put_kernel_nofault(dest, src, u64, failed);
> +			break;

Is case 8 needed for PPC32 ?

> +	}

Do you catch it when size if none of 1,2,4,8 ?

> +
> +	asm ("dcbst 0, %0; sync" :: "r" (dest));

Maybe write it in C:

	dcbst(dest);
	mb(); /* sync */

> +
> +	if (is_exec)
> +		asm ("icbi 0,%0; sync; isync" :: "r" (exec_addr));

Same, can be:

	if (is_exec) {
		icbi(exec_addr);
		mb(); /* sync */
		isync();
	}

Or keep it flat:

	if (!is_exec)
		return 0;

	icbi(exec_addr);
	mb(); /* sync */
	isync();

	return 0;

> +
> +	return 0;
> +
> +failed:
> +	return -EPERM;
> +}
> +
> +static pte_t *start_text_patch(void* dest, u32 **patch_addr)
>   {
> -	int err;
> -	u32 *patch_addr;
> -	unsigned long text_poke_addr;
>   	pte_t *pte;
> -	unsigned long pfn = get_patch_pfn(addr);
> +	unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
> +	unsigned long pfn = get_patch_pfn(dest);
>   
> -	text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
> -	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
> +	*patch_addr = (u32 *)(text_poke_addr + offset_in_page(dest));
>   
>   	pte = virt_to_kpte(text_poke_addr);
>   	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
> @@ -164,33 +168,63 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
>   	if (radix_enabled())
>   		asm volatile("ptesync": : :"memory");
>   
> -	err = __patch_instruction(addr, instr, patch_addr);
> +	return pte;
> +}
>   
> +static void finish_text_patch(pte_t *pte)
> +{
> +	unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;

Leave a blank line after variables declaration.

>   	pte_clear(&init_mm, text_poke_addr, pte);
>   	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> +}
> +
> +static int do_patch_text(void *dest, const void *src, size_t size, bool is_exec)
> +{
> +	int err;
> +	pte_t *pte;
> +	u32 *patch_addr;
> +
> +	pte = start_text_patch(dest, &patch_addr);
> +	err = __patch_text(patch_addr, src, size, is_exec, dest);
> +	finish_text_patch(pte);

Why do you need to split this function in three parts ? I can't see the 
added value, all it does is reduce readability.

Did you check the impact of calling __this_cpu_read() twice ?

>   
>   	return err;
>   }
>   
> -static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int patch_text(void *dest, const void *src, size_t size, bool is_exec)

Same, do you need the source data to go via stack ?

>   {
>   	int err;
>   	unsigned long flags;
>   
> -	/*
> -	 * During early early boot patch_instruction is called
> -	 * when text_poke_area is not ready, but we still need
> -	 * to allow patching. We just do the plain old patching
> -	 */
> +	/* Make sure we aren't patching a freed init section */
> +	if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4))
> +		return 0;
> +
>   	if (!static_branch_likely(&poking_init_done))
> -		return raw_patch_instruction(addr, instr);
> +		return __patch_text(dest, src, size, is_exec, dest);
>   
>   	local_irq_save(flags);
> -	err = __do_patch_instruction(addr, instr);
> +	err = do_patch_text(dest, src, size, is_exec);
>   	local_irq_restore(flags);
>   
>   	return err;
>   }
> +
> +int patch_text_data(void *dest, const void *src, size_t size) {

{ must be on next line for functions start.

> +	return patch_text(dest, src, size, false);
> +}
> +
> +int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
> +{
> +	if (!ppc_inst_prefixed(instr)) {
> +		u32 val = ppc_inst_val(instr);
> +		return __patch_text(addr, &val, sizeof(val), true, addr);
> +	} else {
> +		u64 val = ppc_inst_as_ulong(instr);
> +		return __patch_text(addr, &val, sizeof(val), true, addr);
> +	}
> +}
> +
>   #else /* !CONFIG_STRICT_KERNEL_RWX */
>   
>   static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
> @@ -198,17 +232,24 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
>   	return raw_patch_instruction(addr, instr);
>   }
>   
> +void *patch_memory(void *dest, const void *src, size_t size)

What is this function used for ?

> +{
> +	return memcpy(dest, src, size);
> +}
> +
>   #endif /* CONFIG_STRICT_KERNEL_RWX */
>   
>   __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free);
>   
>   int patch_instruction(u32 *addr, ppc_inst_t instr)
>   {
> -	/* Make sure we aren't patching a freed init section */
> -	if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4))
> -		return 0;
> -
> -	return do_patch_instruction(addr, instr);
> +	if (!ppc_inst_prefixed(instr)) {
> +		u32 val = ppc_inst_val(instr);
> +		return patch_text(addr, &val, sizeof(val), true);
> +	} else {
> +		u64 val = ppc_inst_as_ulong(instr);
> +		return patch_text(addr, &val, sizeof(val), true);
> +	}
>   }
>   NOKPROBE_SYMBOL(patch_instruction);
>
Christophe Leroy Sept. 19, 2022, 6:38 a.m. UTC | #4
Le 16/09/2022 à 08:23, Benjamin Gray a écrit :
>   
> -static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int patch_text(void *dest, const void *src, size_t size, bool is_exec)
>   {
>   	int err;
>   	unsigned long flags;
>   
> -	/*
> -	 * During early early boot patch_instruction is called
> -	 * when text_poke_area is not ready, but we still need
> -	 * to allow patching. We just do the plain old patching
> -	 */
> +	/* Make sure we aren't patching a freed init section */
> +	if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4))
> +		return 0;
> +
>   	if (!static_branch_likely(&poking_init_done))
> -		return raw_patch_instruction(addr, instr);
> +		return __patch_text(dest, src, size, is_exec, dest);
>   

Test ordering looks odd. How can init_mem_is_free be true and 
poking_init_done be false ?

>   	local_irq_save(flags);
> -	err = __do_patch_instruction(addr, instr);
> +	err = do_patch_text(dest, src, size, is_exec);
>   	local_irq_restore(flags);
>   
>   	return err;
>   }
Benjamin Gray Sept. 19, 2022, 6:49 a.m. UTC | #5
On Mon, 2022-09-19 at 06:04 +0000, Christophe Leroy wrote:
> With CONFIG_STRICT_KERNEL_RWX, this patches causes a 15% time
> increase 
> for activation/deactivation of ftrace.

It's possible that new alignment check is the cause. I'll see


> Without CONFIG_STRICT_KERNEL_RWX, it doesn't build.

Yup, fixed for v2

> > +static int __patch_text(void *dest, const void *src, size_t size,
> > bool is_exec, void *exec_addr)
> 
> Is 'text' a good name ? For me text mean executable code. Should it
> be 
> __patch_memory() ?

Well patching regular memory is just a normal store. Text to me implies
its non-writeable. Though __patch_memory would be fine.

> Why pass src as a void * ? This forces data to go via the stack.
> Can't 
> you pass it as a 'long' ?

Probably, I wasn't aware that it would make a difference. I prefer
pointers in general for their semantic meaning, but will change if it
affects param passing.

> > +       if (virt_to_pfn(dest) != virt_to_pfn(dest + size - 1))
> > +               return -EFAULT;
> 
> Why do you need that new check ?

If the patch crosses a page boundary then letting it happen is
unpredictable. Though perhaps this requirement can just be put as a
comment, or require that patches be aligned to the patch size.

> > +               case 8:
> > +                       __put_kernel_nofault(dest, src, u64,
> > failed);
> > +                       break;
> 
> Is case 8 needed for PPC32 ?

I don't have a particular need for it, but the underlying
__put_kernel_nofault is capable of it so I included it.

> > +       }
> 
> Do you catch it when size if none of 1,2,4,8 ?
> 

Not yet. Perhaps I should wrap patch_text_data in a macro that checks
the size with BUILD_BUG_ON? I'd rather not check at runtime.

> > +
> > +       asm ("dcbst 0, %0; sync" :: "r" (dest));
> 
> Maybe write it in C:
> 
>         dcbst(dest);
>         mb(); /* sync */
> 
> > +
> > +       if (is_exec)
> > +               asm ("icbi 0,%0; sync; isync" :: "r" (exec_addr));
> 
> Same, can be:
> 
>         if (is_exec) {
>                 icbi(exec_addr);
>                 mb(); /* sync */
>                 isync();
>         }
> 
> Or keep it flat:
> 
>         if (!is_exec)
>                 return 0;
> 
>         icbi(exec_addr);
>         mb(); /* sync */
>         isync();
> 
>         return 0;

Will try this.

> > +static int do_patch_text(void *dest, const void *src, size_t size,
> > bool is_exec)
> > +{
> > +       int err;
> > +       pte_t *pte;
> > +       u32 *patch_addr;
> > +
> > +       pte = start_text_patch(dest, &patch_addr);
> > +       err = __patch_text(patch_addr, src, size, is_exec, dest);
> > +       finish_text_patch(pte);
> 
> Why do you need to split this function in three parts ? I can't see
> the 
> added value, all it does is reduce readability.

It made it more readable to me, so the __patch_text didn't get buried.
It also made it easier to do the refactoring, and potentially add code
patching variants that use the poke area but not __patch_text. I'll
remove it for v2 though given this is the only use right now.

> Did you check the impact of calling __this_cpu_read() twice ?

I wasn't concerned about performance, but given I'll merge it back
again it will only be read once in v2 again.

> > +void *patch_memory(void *dest, const void *src, size_t size)
> 
> What is this function used for ?
> 

Build failure apparently :)

It's removed in v2.
>
Christophe Leroy Sept. 19, 2022, 7:16 a.m. UTC | #6
Le 19/09/2022 à 08:49, Benjamin Gray a écrit :
> On Mon, 2022-09-19 at 06:04 +0000, Christophe Leroy wrote:
>> With CONFIG_STRICT_KERNEL_RWX, this patches causes a 15% time
>> increase
>> for activation/deactivation of ftrace.
> 
> It's possible that new alignment check is the cause. I'll see
> 
> 
>> Without CONFIG_STRICT_KERNEL_RWX, it doesn't build.
> 
> Yup, fixed for v2
> 
>>> +static int __patch_text(void *dest, const void *src, size_t size,
>>> bool is_exec, void *exec_addr)
>>
>> Is 'text' a good name ? For me text mean executable code. Should it
>> be
>> __patch_memory() ?
> 
> Well patching regular memory is just a normal store. Text to me implies
> its non-writeable. Though __patch_memory would be fine.
> 
>> Why pass src as a void * ? This forces data to go via the stack.
>> Can't
>> you pass it as a 'long' ?
> 
> Probably, I wasn't aware that it would make a difference. I prefer
> pointers in general for their semantic meaning, but will change if it
> affects param passing.
> 
>>> +       if (virt_to_pfn(dest) != virt_to_pfn(dest + size - 1))
>>> +               return -EFAULT;
>>
>> Why do you need that new check ?
> 
> If the patch crosses a page boundary then letting it happen is
> unpredictable. Though perhaps this requirement can just be put as a
> comment, or require that patches be aligned to the patch size.

Why would it be unpredictable ? Only one page is mapped. If it crosses 
the boundary, __put_kernel_nofault() will fail in a controled manner.
I see no point in doing the check before every write.

Requiring an alignment to the patch size may be problematic when 
patching prefixed instructions (8 bytes). Allthough they are garantied 
to lie in a single cache line hence a single page, they are not 
garantied to be aligned to more than 4 bytes.

And while you are thinking about alignment, don't forget that dcbst and 
icbi apply on a give cacheline. If your memory crosses a cacheline you 
may have a problem.

> 
>>> +               case 8:
>>> +                       __put_kernel_nofault(dest, src, u64,
>>> failed);
>>> +                       break;
>>
>> Is case 8 needed for PPC32 ?
> 
> I don't have a particular need for it, but the underlying
> __put_kernel_nofault is capable of it so I included it.

Well, not including it will allow you to pass the source as a 'long' as 
mentionned above.

> 
>>> +       }
>>
>> Do you catch it when size if none of 1,2,4,8 ?
>>
> 
> Not yet. Perhaps I should wrap patch_text_data in a macro that checks
> the size with BUILD_BUG_ON? I'd rather not check at runtime.

Not necessarily a macro. WOuld be better as a static __always_inline in 
code_patching.h

> 
>>> +
>>> +       asm ("dcbst 0, %0; sync" :: "r" (dest));
>>
>> Maybe write it in C:
>>
>>          dcbst(dest);
>>          mb(); /* sync */
>>
>>> +
>>> +       if (is_exec)
>>> +               asm ("icbi 0,%0; sync; isync" :: "r" (exec_addr));
>>
>> Same, can be:
>>
>>          if (is_exec) {
>>                  icbi(exec_addr);
>>                  mb(); /* sync */
>>                  isync();
>>          }
>>
>> Or keep it flat:
>>
>>          if (!is_exec)
>>                  return 0;
>>
>>          icbi(exec_addr);
>>          mb(); /* sync */
>>          isync();
>>
>>          return 0;
> 
> Will try this.
> 
>>> +static int do_patch_text(void *dest, const void *src, size_t size,
>>> bool is_exec)
>>> +{
>>> +       int err;
>>> +       pte_t *pte;
>>> +       u32 *patch_addr;
>>> +
>>> +       pte = start_text_patch(dest, &patch_addr);
>>> +       err = __patch_text(patch_addr, src, size, is_exec, dest);
>>> +       finish_text_patch(pte);
>>
>> Why do you need to split this function in three parts ? I can't see
>> the
>> added value, all it does is reduce readability.
> 
> It made it more readable to me, so the __patch_text didn't get buried.
> It also made it easier to do the refactoring, and potentially add code
> patching variants that use the poke area but not __patch_text. I'll
> remove it for v2 though given this is the only use right now.
> 
>> Did you check the impact of calling __this_cpu_read() twice ?
> 
> I wasn't concerned about performance, but given I'll merge it back
> again it will only be read once in v2 again.
> 
>>> +void *patch_memory(void *dest, const void *src, size_t size)
>>
>> What is this function used for ?
>>
> 
> Build failure apparently :)
> 
> It's removed in v2.
>>
Benjamin Gray Sept. 20, 2022, 1:49 a.m. UTC | #7
On Mon, 2022-09-19 at 06:38 +0000, Christophe Leroy wrote:
> Le 16/09/2022 à 08:23, Benjamin Gray a écrit :
> > [...]
> > +       /* Make sure we aren't patching a freed init section */
> > +       if (static_branch_likely(&init_mem_is_free) &&
> > init_section_contains(dest, 4))
> > +               return 0;
> > +
> >         if (!static_branch_likely(&poking_init_done))
> > -               return raw_patch_instruction(addr, instr);
> > +               return __patch_text(dest, src, size, is_exec,
> > dest);
> >   
> 
> Test ordering looks odd. How can init_mem_is_free be true and 
> poking_init_done be false ?
> > 
That's just the flattened result of the original STRICT_KERNEL_RWX case
(as I hadn't tested building with it disabled).

With the build fixed for v2, the structure is similar to the original
the second check is not performed if CONFIG_STRICT_KERNEL_RWX is
disabled.
>
Benjamin Gray Sept. 20, 2022, 2:32 a.m. UTC | #8
On Mon, 2022-09-19 at 07:16 +0000, Christophe Leroy wrote:
> Why would it be unpredictable ? Only one page is mapped. If it
> crosses 
> the boundary, __put_kernel_nofault() will fail in a controled manner.
> I see no point in doing the check before every write.

Oh I didn't see that get_vm_area automatically adds a guard. You're
right then, it's redundant. I had assumed there could be a writeable
mapping directly after.

> And while you are thinking about alignment, don't forget that dcbst
> and 
> icbi apply on a give cacheline. If your memory crosses a cacheline
> you 
> may have a problem.

Yeah, though this applies to the existing patch_instruction too (in
theory; prefixed instructions cannot cross a 64 byte boundary, but the
ISA does not specify minimum cacheline size). I don't have a nice
solution right now though. The flush needs to be done on the effective
address (i.e. text poke address) according to the ISA, but the text
poke address is only valid within the IRQ save region. So non-prefixed
instruction patching would either pay for some kind of check, or need
special casing.

Maybe an "is aligned" flag in a generic __patch_text to make the extra
flush conditional is good enough?


Related to EA based flushing, data patching ought to run 'dcbst' on the
'exec_addr' too. So given the icache flush only needs to be applied to
instruction patching, and data flush only to data patching, I plan to
move those exec_addr syncs outside of __patch_text to the relevant
public instruction/data specific entry points.

> > > > +               case 8:
> > > > +                       __put_kernel_nofault(dest, src, u64,
> > > > failed);
> > > > +                       break;
> > > 
> > > Is case 8 needed for PPC32 ?
> > 
> > I don't have a particular need for it, but the underlying
> > __put_kernel_nofault is capable of it so I included it.
> 
> Well, not including it will allow you to pass the source as a 'long'
> as 
> mentionned above.

I checked the machine code of a 32 bit build, but it still passes the
pointer in a register. I also checked all 3 ABI docs and they say a
pointer is the same size as a long. Could you clarify when a pointer is
passed on the stack but not a long?

Or do you mean that we could pass the pointed-to data in a register and
skip the pointer altogether? That seems like a good choice, but
__put_kernel_nofault takes a pointer to the source and the
implementation is very complex. I don't think I can safely write the
modified version we'd need at this point.
Christophe Leroy Sept. 20, 2022, 5:44 a.m. UTC | #9
Le 20/09/2022 à 04:32, Benjamin Gray a écrit :
> On Mon, 2022-09-19 at 07:16 +0000, Christophe Leroy wrote:
>> Why would it be unpredictable ? Only one page is mapped. If it
>> crosses
>> the boundary, __put_kernel_nofault() will fail in a controled manner.
>> I see no point in doing the check before every write.
> 
> Oh I didn't see that get_vm_area automatically adds a guard. You're
> right then, it's redundant. I had assumed there could be a writeable
> mapping directly after.
> 
>> And while you are thinking about alignment, don't forget that dcbst
>> and
>> icbi apply on a give cacheline. If your memory crosses a cacheline
>> you
>> may have a problem.
> 
> Yeah, though this applies to the existing patch_instruction too (in
> theory; prefixed instructions cannot cross a 64 byte boundary, but the
> ISA does not specify minimum cacheline size). I don't have a nice
> solution right now though. The flush needs to be done on the effective
> address (i.e. text poke address) according to the ISA, but the text
> poke address is only valid within the IRQ save region. So non-prefixed
> instruction patching would either pay for some kind of check, or need
> special casing.

As far as I know, cachelines are minimum 64 bytes on PPC64 aren't they ?

> 
> Maybe an "is aligned" flag in a generic __patch_text to make the extra
> flush conditional is good enough?

Well, if the cacheline is already flushed, the operation will be a nop 
anyway so it shouldn't cost much to flush both addresses, after all it 
is the sync that costs and you'll still have only one.

> 
> 
> Related to EA based flushing, data patching ought to run 'dcbst' on the
> 'exec_addr' too. So given the icache flush only needs to be applied to
> instruction patching, and data flush only to data patching, I plan to
> move those exec_addr syncs outside of __patch_text to the relevant
> public instruction/data specific entry points.

Why should it run 'dcbst' on the 'exec_addr' at all ? We have not 
written anything there.

Anyway, powerpc handles cachelines by physical address, so no matter 
which EA you use as far as it is done.

And dcbst is handled as a write by the MMU, so you just can't apply it 
on the read-only exec address.


> 
>>>>> +               case 8:
>>>>> +                       __put_kernel_nofault(dest, src, u64,
>>>>> failed);
>>>>> +                       break;
>>>>
>>>> Is case 8 needed for PPC32 ?
>>>
>>> I don't have a particular need for it, but the underlying
>>> __put_kernel_nofault is capable of it so I included it.
>>
>> Well, not including it will allow you to pass the source as a 'long'
>> as
>> mentionned above.
> 
> I checked the machine code of a 32 bit build, but it still passes the
> pointer in a register. I also checked all 3 ABI docs and they say a
> pointer is the same size as a long. Could you clarify when a pointer is
> passed on the stack but not a long?
> 
> Or do you mean that we could pass the pointed-to data in a register and
> skip the pointer altogether? That seems like a good choice, but
> __put_kernel_nofault takes a pointer to the source and the
> implementation is very complex. I don't think I can safely write the
> modified version we'd need at this point.

Yes I meant to pass the value instead of passing a pointer to the value.
When you pass a pointer to a value, it forces gcc to put that value in 
memory, namely in the stack. While when you pass the value directly , 
then it gets passed in a register.

So I think you have to pass the value and only change it to a pointer to 
that value at the time you are calling __put_kernel_nofault(). That way 
gcc is able to handle it efficiently and most of the time voids going 
via memory.

Today raw_patch_instruction() is :

c0017ebc <raw_patch_instruction>:
c0017ebc:	90 83 00 00 	stw     r4,0(r3)
c0017ec0:	7c 00 18 6c 	dcbst   0,r3
c0017ec4:	7c 00 04 ac 	hwsync
c0017ec8:	7c 00 1f ac 	icbi    0,r3
c0017ecc:	7c 00 04 ac 	hwsync
c0017ed0:	4c 00 01 2c 	isync
c0017ed4:	38 60 00 00 	li      r3,0
c0017ed8:	4e 80 00 20 	blr
c0017edc:	38 60 ff ff 	li      r3,-1
c0017ee0:	4e 80 00 20 	blr

Here r4 is the value to be written.


With your patch, extract from __patch_text() is:

c0017fdc:	28 05 00 04 	cmplwi  r5,4
c0017fe0:	41 82 00 74 	beq     c0018054 <__patch_text+0x98>
c0017fe4:	41 81 00 40 	bgt     c0018024 <__patch_text+0x68>
c0017fe8:	28 05 00 01 	cmplwi  r5,1
c0017fec:	41 82 00 74 	beq     c0018060 <__patch_text+0xa4>
c0017ff0:	28 05 00 02 	cmplwi  r5,2
c0017ff4:	40 82 00 0c 	bne     c0018000 <__patch_text+0x44>
c0017ff8:	a1 24 00 00 	lhz     r9,0(r4)
c0017ffc:	b1 23 00 00 	sth     r9,0(r3)
c0018000:	7c 00 18 6c 	dcbst   0,r3
c0018004:	7c 00 04 ac 	hwsync
c0018008:	2c 06 00 00 	cmpwi   r6,0
c001800c:	38 60 00 00 	li      r3,0
c0018010:	4d 82 00 20 	beqlr
c0018014:	7c 00 3f ac 	icbi    0,r7
c0018018:	7c 00 04 ac 	hwsync
c001801c:	4c 00 01 2c 	isync
c0018020:	4e 80 00 20 	blr
c0018024:	28 05 00 08 	cmplwi  r5,8
c0018028:	40 a2 ff d8 	bne     c0018000 <__patch_text+0x44>
c001802c:	81 44 00 00 	lwz     r10,0(r4)
c0018030:	81 64 00 04 	lwz     r11,4(r4)
c0018034:	91 43 00 00 	stw     r10,0(r3)
c0018038:	91 63 00 04 	stw     r11,4(r3)
c001803c:	7c 00 18 6c 	dcbst   0,r3
c0018040:	7c 00 04 ac 	hwsync
c0018044:	2c 06 00 00 	cmpwi   r6,0
c0018048:	38 60 00 00 	li      r3,0
c001804c:	4d 82 00 20 	beqlr
c0018050:	4b ff ff c4 	b       c0018014 <__patch_text+0x58>
c0018054:	81 24 00 00 	lwz     r9,0(r4)
c0018058:	91 23 00 00 	stw     r9,0(r3)
c001805c:	4b ff ff a4 	b       c0018000 <__patch_text+0x44>
c0018060:	89 24 00 00 	lbz     r9,0(r4)
c0018064:	99 23 00 00 	stb     r9,0(r3)
c0018068:	4b ff ff 98 	b       c0018000 <__patch_text+0x44>
c001806c:	38 60 ff ff 	li      r3,-1
c0018070:	4e 80 00 20 	blr
c0018074:	38 60 ff f2 	li      r3,-14
c0018078:	4e 80 00 20 	blr

So as you can see, r4 now is a memory pointer and the data has to be 
loaded from there.


Christophe
Benjamin Gray Sept. 20, 2022, 7:01 a.m. UTC | #10
On Tue, 2022-09-20 at 05:44 +0000, Christophe Leroy wrote:
> 
> As far as I know, cachelines are minimum 64 bytes on PPC64 aren't
> they ?

In practice maybe. I don't know what the convention is in the kernel in
cases where the ISA is less specific than what the supported machines
do.

> > Related to EA based flushing, data patching ought to run 'dcbst' on
> > the
> > 'exec_addr' too. So given the icache flush only needs to be applied
> > to
> > instruction patching, and data flush only to data patching, I plan
> > to
> > move those exec_addr syncs outside of __patch_text to the relevant
> > public instruction/data specific entry points.
> 
> Why should it run 'dcbst' on the 'exec_addr' at all ? We have not 
> written anything there.
> 
> Anyway, powerpc handles cachelines by physical address, so no matter 
> which EA you use as far as it is done.
> 
> And dcbst is handled as a write by the MMU, so you just can't apply
> it 
> on the read-only exec address.

I was talking with Michael Ellerman and he said that, for instructions
at least, the cache operations apply to the EA. So the exec address and
the text poke address are not interchangeable. Flushing the icache
needs to be done on the exec address, not the text poke address.

The ISA uses identical wording for icache and dcache blocks ("block
containing the byte addressed by EA"), so I assume the same applies for
data too. I am trying to flush a cached value for the data EA to ensure
that the value in main memory from the text-poke EA is correctly loaded
(that's the goal, I guess that was the wrong instruction).

Or given that multiple processes sharing a physical page for RW data is
a common scenario, it could just be expected that the hardware handles
virtual address aliases for data cache.

So I don't know, and the ISA doesn't look any different to me. I'll
need some kind of confirmation either way on this.

> Today raw_patch_instruction() is :
> 
> c0017ebc <raw_patch_instruction>:
> c0017ebc:       90 83 00 00     stw     r4,0(r3)
> c0017ec0:       7c 00 18 6c     dcbst   0,r3
> c0017ec4:       7c 00 04 ac     hwsync
> c0017ec8:       7c 00 1f ac     icbi    0,r3
> c0017ecc:       7c 00 04 ac     hwsync
> c0017ed0:       4c 00 01 2c     isync
> c0017ed4:       38 60 00 00     li      r3,0
> c0017ed8:       4e 80 00 20     blr
> c0017edc:       38 60 ff ff     li      r3,-1
> c0017ee0:       4e 80 00 20     blr
> 
> Here r4 is the value to be written.
> 
> 
> With your patch, extract from __patch_text() is:
> 
> [...]
> 
> So as you can see, r4 now is a memory pointer and the data has to be 
> loaded from there.

For this version of raw_patch_instruction

int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
{
	int err;

	if (!ppc_inst_prefixed(instr)) {
		u32 val = ppc_inst_val(instr);
		err = __patch_text(addr, &val, sizeof(val));
	} else {
		u64 val = ppc_inst_as_ulong(instr);
		err = __patch_text(addr, &val, sizeof(val));
	}

	icbi(addr);
	mb(); /* sync */
	isync();

	return err;
}

This is the 32-bit machine code my compiler generated

c0040994 <raw_patch_instruction>:
c0040994:   7c 69 1b 78     mr      r9,r3
c0040998:   90 83 00 00     stw     r4,0(r3)
c004099c:   7c 00 18 6c     dcbst   0,r3
c00409a0:   7c 00 04 ac     hwsync
c00409a4:   38 60 00 00     li      r3,0
c00409a8:   7c 00 4f ac     icbi    0,r9
c00409ac:   7c 00 04 ac     hwsync
c00409b0:   4c 00 01 2c     isync
c00409b4:   4e 80 00 20     blr
c00409b8:   38 60 ff ff     li      r3,-1
c00409bc:   4b ff ff ec     b       c00409a8
                                      <raw_patch_instruction+0x14>

It seems GCC is able to use the register automatically. But I agree
that the __patch_text generation is better, and GCC can automatically
elide the pointer stuff, so will change for v2.
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 1c6316ec4b74..6a52c19dae46 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -76,6 +76,7 @@  int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
 int patch_branch(u32 *addr, unsigned long target, int flags);
 int patch_instruction(u32 *addr, ppc_inst_t instr);
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
+int patch_text_data(void *dest, const void *src, size_t size);
 
 static inline unsigned long patch_site_addr(s32 *site)
 {
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index ad0cf3108dd0..a09a0898c2ce 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -3,6 +3,7 @@ 
  *  Copyright 2008 Michael Ellerman, IBM Corporation.
  */
 
+#include <linux/mm.h>
 #include <linux/kprobes.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
@@ -14,32 +15,7 @@ 
 #include <asm/page.h>
 #include <asm/code-patching.h>
 #include <asm/inst.h>
-
-static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
-{
-	if (!ppc_inst_prefixed(instr)) {
-		u32 val = ppc_inst_val(instr);
-
-		__put_kernel_nofault(patch_addr, &val, u32, failed);
-	} else {
-		u64 val = ppc_inst_as_ulong(instr);
-
-		__put_kernel_nofault(patch_addr, &val, u64, failed);
-	}
-
-	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
-							    "r" (exec_addr));
-
-	return 0;
-
-failed:
-	return -EPERM;
-}
-
-int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
-{
-	return __patch_instruction(addr, instr, addr);
-}
+#include <asm/cacheflush.h>
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
@@ -147,16 +123,44 @@  static void unmap_patch_area(unsigned long addr)
 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 }
 
-static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+static int __patch_text(void *dest, const void *src, size_t size, bool is_exec, void *exec_addr)
+{
+	if (virt_to_pfn(dest) != virt_to_pfn(dest + size - 1))
+		return -EFAULT;
+
+	switch (size) {
+		case 1:
+			__put_kernel_nofault(dest, src, u8, failed);
+			break;
+		case 2:
+			__put_kernel_nofault(dest, src, u16, failed);
+			break;
+		case 4:
+			__put_kernel_nofault(dest, src, u32, failed);
+			break;
+		case 8:
+			__put_kernel_nofault(dest, src, u64, failed);
+			break;
+	}
+
+	asm ("dcbst 0, %0; sync" :: "r" (dest));
+
+	if (is_exec)
+		asm ("icbi 0,%0; sync; isync" :: "r" (exec_addr));
+
+	return 0;
+
+failed:
+	return -EPERM;
+}
+
+static pte_t *start_text_patch(void* dest, u32 **patch_addr)
 {
-	int err;
-	u32 *patch_addr;
-	unsigned long text_poke_addr;
 	pte_t *pte;
-	unsigned long pfn = get_patch_pfn(addr);
+	unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
+	unsigned long pfn = get_patch_pfn(dest);
 
-	text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
-	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+	*patch_addr = (u32 *)(text_poke_addr + offset_in_page(dest));
 
 	pte = virt_to_kpte(text_poke_addr);
 	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
@@ -164,33 +168,63 @@  static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	if (radix_enabled())
 		asm volatile("ptesync": : :"memory");
 
-	err = __patch_instruction(addr, instr, patch_addr);
+	return pte;
+}
 
+static void finish_text_patch(pte_t *pte)
+{
+	unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
 	pte_clear(&init_mm, text_poke_addr, pte);
 	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+}
+
+static int do_patch_text(void *dest, const void *src, size_t size, bool is_exec)
+{
+	int err;
+	pte_t *pte;
+	u32 *patch_addr;
+
+	pte = start_text_patch(dest, &patch_addr);
+	err = __patch_text(patch_addr, src, size, is_exec, dest);
+	finish_text_patch(pte);
 
 	return err;
 }
 
-static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
+static int patch_text(void *dest, const void *src, size_t size, bool is_exec)
 {
 	int err;
 	unsigned long flags;
 
-	/*
-	 * During early early boot patch_instruction is called
-	 * when text_poke_area is not ready, but we still need
-	 * to allow patching. We just do the plain old patching
-	 */
+	/* Make sure we aren't patching a freed init section */
+	if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4))
+		return 0;
+
 	if (!static_branch_likely(&poking_init_done))
-		return raw_patch_instruction(addr, instr);
+		return __patch_text(dest, src, size, is_exec, dest);
 
 	local_irq_save(flags);
-	err = __do_patch_instruction(addr, instr);
+	err = do_patch_text(dest, src, size, is_exec);
 	local_irq_restore(flags);
 
 	return err;
 }
+
+int patch_text_data(void *dest, const void *src, size_t size) {
+	return patch_text(dest, src, size, false);
+}
+
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	if (!ppc_inst_prefixed(instr)) {
+		u32 val = ppc_inst_val(instr);
+		return __patch_text(addr, &val, sizeof(val), true, addr);
+	} else {
+		u64 val = ppc_inst_as_ulong(instr);
+		return __patch_text(addr, &val, sizeof(val), true, addr);
+	}
+}
+
 #else /* !CONFIG_STRICT_KERNEL_RWX */
 
 static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
@@ -198,17 +232,24 @@  static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	return raw_patch_instruction(addr, instr);
 }
 
+void *patch_memory(void *dest, const void *src, size_t size)
+{
+	return memcpy(dest, src, size);
+}
+
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
 __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free);
 
 int patch_instruction(u32 *addr, ppc_inst_t instr)
 {
-	/* Make sure we aren't patching a freed init section */
-	if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4))
-		return 0;
-
-	return do_patch_instruction(addr, instr);
+	if (!ppc_inst_prefixed(instr)) {
+		u32 val = ppc_inst_val(instr);
+		return patch_text(addr, &val, sizeof(val), true);
+	} else {
+		u64 val = ppc_inst_as_ulong(instr);
+		return patch_text(addr, &val, sizeof(val), true);
+	}
 }
 NOKPROBE_SYMBOL(patch_instruction);