diff mbox series

[v3,1/6] powerpc/code-patching: Implement generic text patching function

Message ID 20221005053234.29312-2-bgray@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series Out-of-line static calls for powerpc64 ELF V2 | expand

Commit Message

Benjamin Gray Oct. 5, 2022, 5:32 a.m. UTC
Adds a generic text patching mechanism for patches of size int or long
bytes.

The patch_instruction function is reimplemented in terms of this
more generic function. This generic implementation allows patching of
arbitrary long data, such as pointers on 64-bit.

On 32-bit patch_int is marked noinline to prevent a mis-optimisation.
Without noinline, inside patch_branch the compiler may inline all the
way to do_patch_memory, preventing the compiler from inlining
do_patch_memory into patch_int. This would needlessly force patch_int
to be a branch to do_patch_memory.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
---
 arch/powerpc/include/asm/code-patching.h | 29 ++++++++++
 arch/powerpc/lib/code-patching.c         | 73 ++++++++++++++++++------
 2 files changed, 85 insertions(+), 17 deletions(-)

Comments

Christophe Leroy Oct. 5, 2022, 5:55 p.m. UTC | #1
Hi,

Le 05/10/2022 à 07:32, Benjamin Gray a écrit :
> Adds a generic text patching mechanism for patches of size int or long
> bytes.
> 
> The patch_instruction function is reimplemented in terms of this
> more generic function. This generic implementation allows patching of
> arbitrary long data, such as pointers on 64-bit.
> 
> On 32-bit patch_int is marked noinline to prevent a mis-optimisation.
> Without noinline, inside patch_branch the compiler may inline all the
> way to do_patch_memory, preventing the compiler from inlining
> do_patch_memory into patch_int. This would needlessly force patch_int
> to be a branch to do_patch_memory.

I'm on business trip this week so I can't test it on hardware, but the 
generated code looks horrid and sub-optimal, with a stack frame and so 
many registers saved into it. That's mpc885_ads_defconfig built with GCC 
12, without modules without stackprotector with 4k pages.

00000168 <__patch_memory.constprop.0>:
  168:	90 83 00 00 	stw     r4,0(r3)
  16c:	7c 00 18 6c 	dcbst   0,r3
  170:	7c 00 04 ac 	hwsync
  174:	7c 00 2f ac 	icbi    0,r5
  178:	7c 00 04 ac 	hwsync
  17c:	4c 00 01 2c 	isync
  180:	38 60 00 00 	li      r3,0
  184:	4e 80 00 20 	blr
  188:	38 60 ff ff 	li      r3,-1
  18c:	4e 80 00 20 	blr

00000190 <raw_patch_instruction>:
  190:	90 83 00 00 	stw     r4,0(r3)
  194:	7c 00 18 6c 	dcbst   0,r3
  198:	7c 00 04 ac 	hwsync
  19c:	7c 00 1f ac 	icbi    0,r3
  1a0:	7c 00 04 ac 	hwsync
  1a4:	4c 00 01 2c 	isync
  1a8:	38 60 00 00 	li      r3,0
  1ac:	4e 80 00 20 	blr
  1b0:	38 60 ff ff 	li      r3,-1
  1b4:	4e 80 00 20 	blr

000001b8 <patch_uint>:
  1b8:	7c 65 1b 78 	mr      r5,r3
  1bc:	48 00 00 a4 	b       260 <patch_uint+0xa8>
  1c0:	94 21 ff e0 	stwu    r1,-32(r1)
  1c4:	7c 08 02 a6 	mflr    r0
  1c8:	90 01 00 24 	stw     r0,36(r1)
  1cc:	93 81 00 10 	stw     r28,16(r1)
  1d0:	93 a1 00 14 	stw     r29,20(r1)
  1d4:	93 c1 00 18 	stw     r30,24(r1)
  1d8:	93 e1 00 1c 	stw     r31,28(r1)
  1dc:	7f 80 00 a6 	mfmsr   r28
  1e0:	7c 51 13 a6 	mtspr   81,r2
  1e4:	3d 20 00 00 	lis     r9,0
			1e6: R_PPC_ADDR16_HA	.data
  1e8:	81 49 00 00 	lwz     r10,0(r9)
			1ea: R_PPC_ADDR16_LO	.data
  1ec:	3d 20 00 00 	lis     r9,0
			1ee: R_PPC_ADDR16_HA	init_mm+0x24
  1f0:	83 ea 00 04 	lwz     r31,4(r10)
  1f4:	80 e9 00 00 	lwz     r7,0(r9)
			1f6: R_PPC_ADDR16_LO	init_mm+0x24
  1f8:	57 e8 65 3a 	rlwinm  r8,r31,12,20,29
  1fc:	7f a7 40 2e 	lwzx    r29,r7,r8
  200:	7c 69 1b 78 	mr      r9,r3
  204:	3d 29 40 00 	addis   r9,r9,16384
  208:	57 fe b5 3a 	rlwinm  r30,r31,22,20,29
  20c:	55 29 00 26 	clrrwi  r9,r9,12
  210:	61 29 01 25 	ori     r9,r9,293
  214:	57 bd 00 26 	clrrwi  r29,r29,12
  218:	3f de c0 00 	addis   r30,r30,-16384
  21c:	7d 3d f1 2e 	stwx    r9,r29,r30
  220:	53 e3 00 26 	rlwimi  r3,r31,0,0,19
  224:	4b ff ff 45 	bl      168 <__patch_memory.constprop.0>
  228:	39 20 00 00 	li      r9,0
  22c:	7d 3d f1 2e 	stwx    r9,r29,r30
  230:	57 ff 00 26 	clrrwi  r31,r31,12
  234:	7c 00 fa 64 	tlbie   r31,r0
  238:	7c 00 04 ac 	hwsync
  23c:	7f 80 01 24 	mtmsr   r28
  240:	80 01 00 24 	lwz     r0,36(r1)
  244:	83 81 00 10 	lwz     r28,16(r1)
  248:	83 a1 00 14 	lwz     r29,20(r1)
  24c:	83 c1 00 18 	lwz     r30,24(r1)
  250:	83 e1 00 1c 	lwz     r31,28(r1)
  254:	7c 08 03 a6 	mtlr    r0
  258:	38 21 00 20 	addi    r1,r1,32
  25c:	4e 80 00 20 	blr
  260:	4b ff ff 08 	b       168 <__patch_memory.constprop.0>


Christophe


> 
> Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h | 29 ++++++++++
>   arch/powerpc/lib/code-patching.c         | 73 ++++++++++++++++++------
>   2 files changed, 85 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..170bfa848c7c 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t *instr, const u32 *addr,
>   int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>   		       unsigned long target, int flags);
>   int patch_branch(u32 *addr, unsigned long target, int flags);
> +
> +/* patch_uint and patch_ulong must only be called on addresses where the patch
> + * does not cross a cacheline, otherwise it may not be flushed properly and
> + * mixes of new and stale data may be observed.
> + *
> + * patch_instruction and other instruction patchers automatically satisfy this
> + * requirement due to instruction alignment requirements.
> + */
> +
> +int patch_uint(void *addr, unsigned int val);
> +
> +#ifdef CONFIG_PPC64
> +
> +int patch_ulong(void *addr, unsigned long val);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
> +
> +#else
> +
> +static inline int patch_ulong(void *addr, unsigned long val)
> +{
> +	return patch_uint(addr, val);
> +}
> +
> +static inline int patch_instruction(u32 *addr, ppc_inst_t instr)
> +{
> +	return patch_uint(addr, ppc_inst_val(instr));
> +}
> +
> +#endif
> +
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
>   
>   static inline unsigned long patch_site_addr(s32 *site)
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index 125c55e3e148..ecdd2e523d9a 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -15,20 +15,24 @@
>   #include <asm/code-patching.h>
>   #include <asm/inst.h>
>   
> -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
> +static int __patch_memory(void *patch_addr, unsigned long val, void *exec_addr,
> +			   bool is_dword)
>   {
> -	if (!ppc_inst_prefixed(instr)) {
> -		u32 val = ppc_inst_val(instr);
> -
> -		__put_kernel_nofault(patch_addr, &val, u32, failed);
> -	} else {
> -		u64 val = ppc_inst_as_ulong(instr);
> +	/* Prefixed instruction may cross cacheline if cacheline smaller than 64 bytes */
> +	BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64);
>   
> +	if (unlikely(is_dword))
>   		__put_kernel_nofault(patch_addr, &val, u64, failed);
> -	}
> +	else
> +		__put_kernel_nofault(patch_addr, &val, u32, failed);
>   
> -	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
> -							    "r" (exec_addr));
> +	/* Assume data is inside a single cacheline */
> +	dcbst(patch_addr);
> +	mb(); /* sync */
> +	/* Flush on the EA that may be executed in case of a non-coherent icache */
> +	icbi(exec_addr);
> +	mb(); /* sync */
> +	isync();
>   
>   	return 0;
>   
> @@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr
>   
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
>   {
> -	return __patch_instruction(addr, instr, addr);
> +	if (ppc_inst_prefixed(instr))
> +		return __patch_memory(addr, ppc_inst_as_ulong(instr), addr, true);
> +	else
> +		return __patch_memory(addr, ppc_inst_val(instr), addr, false);
>   }
>   
>   static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
> @@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long addr)
>   	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int __do_patch_memory(void *addr, unsigned long val, bool is_dword)
>   {
>   	int err;
>   	u32 *patch_addr;
> @@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
>   	if (radix_enabled())
>   		asm volatile("ptesync": : :"memory");
>   
> -	err = __patch_instruction(addr, instr, patch_addr);
> +	err = __patch_memory(patch_addr, val, addr, is_dword);
>   
>   	pte_clear(&init_mm, text_poke_addr, pte);
>   	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> @@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
>   	return err;
>   }
>   
> -int patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int do_patch_memory(void *addr, unsigned long val, bool is_dword)
>   {
>   	int err;
>   	unsigned long flags;
> @@ -186,15 +193,47 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>   	 */
>   	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
>   	    !static_branch_likely(&poking_init_done))
> -		return raw_patch_instruction(addr, instr);
> +		return __patch_memory(addr, val, addr, is_dword);
>   
>   	local_irq_save(flags);
> -	err = __do_patch_instruction(addr, instr);
> +	err = __do_patch_memory(addr, val, is_dword);
>   	local_irq_restore(flags);
>   
>   	return err;
>   }
> -NOKPROBE_SYMBOL(patch_instruction);
> +
> +#ifdef CONFIG_PPC64
> +
> +int patch_uint(void *addr, unsigned int val)
> +{
> +	return do_patch_memory(addr, val, false);
> +}
> +NOKPROBE_SYMBOL(patch_uint)
> +
> +int patch_ulong(void *addr, unsigned long val)
> +{
> +	return do_patch_memory(addr, val, true);
> +}
> +NOKPROBE_SYMBOL(patch_ulong)
> +
> +int patch_instruction(u32 *addr, ppc_inst_t instr)
> +{
> +	if (ppc_inst_prefixed(instr))
> +		return patch_ulong(addr, ppc_inst_as_ulong(instr));
> +	else
> +		return patch_uint(addr, ppc_inst_val(instr));
> +}
> +NOKPROBE_SYMBOL(patch_instruction)
> +
> +#else
> +
> +noinline int patch_uint(void *addr, unsigned int val)
> +{
> +	return do_patch_memory(addr, val, false);
> +}
> +NOKPROBE_SYMBOL(patch_uint)
> +
> +#endif
>   
>   int patch_branch(u32 *addr, unsigned long target, int flags)
>   {
Benjamin Gray Oct. 6, 2022, 3:36 a.m. UTC | #2
On Wed, 2022-10-05 at 17:55 +0000, Christophe Leroy wrote:
> I'm on business trip this week so I can't test it on hardware, but
> the 
> generated code looks horrid and sub-optimal, with a stack frame and
> so 
> many registers saved into it. That's mpc885_ads_defconfig built with
> GCC 
> 12, without modules without stackprotector with 4k pages.

Yeah, that definitely wasn't supposed to happen. I was looking at the
32 and 64 bit machine code closely while actively writing it, but I
must have refactored it badly when cleaning up for submission. It was
supposed to automatically be inlined, leaving it identical to the
original on 32-bit.

Given inlining is desirable here, and 64-bit inlines anyway, I think I
should just mark __patch_memory with __always_inline.
Christophe Leroy Oct. 6, 2022, 9:19 a.m. UTC | #3
Le 06/10/2022 à 05:36, Benjamin Gray a écrit :
> On Wed, 2022-10-05 at 17:55 +0000, Christophe Leroy wrote:
>> I'm on business trip this week so I can't test it on hardware, but
>> the
>> generated code looks horrid and sub-optimal, with a stack frame and
>> so
>> many registers saved into it. That's mpc885_ads_defconfig built with
>> GCC
>> 12, without modules without stackprotector with 4k pages.
> 
> Yeah, that definitely wasn't supposed to happen. I was looking at the
> 32 and 64 bit machine code closely while actively writing it, but I
> must have refactored it badly when cleaning up for submission. It was
> supposed to automatically be inlined, leaving it identical to the
> original on 32-bit.
> 
> Given inlining is desirable here, and 64-bit inlines anyway, I think I
> should just mark __patch_memory with __always_inline.

FWIW, I get a far better result with :

diff --git a/arch/powerpc/lib/code-patching.c 
b/arch/powerpc/lib/code-patching.c
index ba00c550d9d2..447b8de6e427 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,7 +21,7 @@ static int __patch_memory(void *patch_addr, unsigned 
long val, void *exec_addr,
  	/* Prefixed instruction may cross cacheline if cacheline smaller than 
64 bytes */
  	BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64);

-	if (unlikely(is_dword))
+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(is_dword))
  		__put_kernel_nofault(patch_addr, &val, u64, failed);
  	else
  		__put_kernel_nofault(patch_addr, &val, u32, failed);
Benjamin Gray Oct. 6, 2022, 9:53 p.m. UTC | #4
On Thu, 2022-10-06 at 09:19 +0000, Christophe Leroy wrote:
> 
> 
> Le 06/10/2022 à 05:36, Benjamin Gray a écrit :
> > On Wed, 2022-10-05 at 17:55 +0000, Christophe Leroy wrote:
> > > I'm on business trip this week so I can't test it on hardware,
> > > but
> > > the
> > > generated code looks horrid and sub-optimal, with a stack frame
> > > and
> > > so
> > > many registers saved into it. That's mpc885_ads_defconfig built
> > > with
> > > GCC
> > > 12, without modules without stackprotector with 4k pages.
> > 
> > Yeah, that definitely wasn't supposed to happen. I was looking at
> > the
> > 32 and 64 bit machine code closely while actively writing it, but I
> > must have refactored it badly when cleaning up for submission. It
> > was
> > supposed to automatically be inlined, leaving it identical to the
> > original on 32-bit.
> > 
> > Given inlining is desirable here, and 64-bit inlines anyway, I
> > think I
> > should just mark __patch_memory with __always_inline.
> 
> FWIW, I get a far better result with :
> 
> diff --git a/arch/powerpc/lib/code-patching.c 
> b/arch/powerpc/lib/code-patching.c
> index ba00c550d9d2..447b8de6e427 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -21,7 +21,7 @@ static int __patch_memory(void *patch_addr,
> unsigned 
> long val, void *exec_addr,
>         /* Prefixed instruction may cross cacheline if cacheline
> smaller than 
> 64 bytes */
>         BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES <
> 64);
> 
> -       if (unlikely(is_dword))
> +       if (IS_ENABLED(CONFIG_PPC64) && unlikely(is_dword))
>                 __put_kernel_nofault(patch_addr, &val, u64, failed);
>         else
>                 __put_kernel_nofault(patch_addr, &val, u32, failed);

That's very interesting, as that's what I had originally. I removed the
IS_ENABLED(CONFIG_PPC64) part of the if condition as part of submission
cleanup refactoring because it should be redundant after constant
propagation. The weird thing is that GCC constant propagates either
way, it's just changing it's mind about inlining.

I can add it back, but the optimisation here seems very fragile. It
feels more reliable just to specify it explicitly.
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 3f881548fb61..170bfa848c7c 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -72,7 +72,36 @@  static inline int create_branch(ppc_inst_t *instr, const u32 *addr,
 int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
 		       unsigned long target, int flags);
 int patch_branch(u32 *addr, unsigned long target, int flags);
+
+/* patch_uint and patch_ulong must only be called on addresses where the patch
+ * does not cross a cacheline, otherwise it may not be flushed properly and
+ * mixes of new and stale data may be observed.
+ *
+ * patch_instruction and other instruction patchers automatically satisfy this
+ * requirement due to instruction alignment requirements.
+ */
+
+int patch_uint(void *addr, unsigned int val);
+
+#ifdef CONFIG_PPC64
+
+int patch_ulong(void *addr, unsigned long val);
 int patch_instruction(u32 *addr, ppc_inst_t instr);
+
+#else
+
+static inline int patch_ulong(void *addr, unsigned long val)
+{
+	return patch_uint(addr, val);
+}
+
+static inline int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	return patch_uint(addr, ppc_inst_val(instr));
+}
+
+#endif
+
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
 
 static inline unsigned long patch_site_addr(s32 *site)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 125c55e3e148..ecdd2e523d9a 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -15,20 +15,24 @@ 
 #include <asm/code-patching.h>
 #include <asm/inst.h>
 
-static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
+static int __patch_memory(void *patch_addr, unsigned long val, void *exec_addr,
+			   bool is_dword)
 {
-	if (!ppc_inst_prefixed(instr)) {
-		u32 val = ppc_inst_val(instr);
-
-		__put_kernel_nofault(patch_addr, &val, u32, failed);
-	} else {
-		u64 val = ppc_inst_as_ulong(instr);
+	/* Prefixed instruction may cross cacheline if cacheline smaller than 64 bytes */
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64);
 
+	if (unlikely(is_dword))
 		__put_kernel_nofault(patch_addr, &val, u64, failed);
-	}
+	else
+		__put_kernel_nofault(patch_addr, &val, u32, failed);
 
-	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
-							    "r" (exec_addr));
+	/* Assume data is inside a single cacheline */
+	dcbst(patch_addr);
+	mb(); /* sync */
+	/* Flush on the EA that may be executed in case of a non-coherent icache */
+	icbi(exec_addr);
+	mb(); /* sync */
+	isync();
 
 	return 0;
 
@@ -38,7 +42,10 @@  static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr
 
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 {
-	return __patch_instruction(addr, instr, addr);
+	if (ppc_inst_prefixed(instr))
+		return __patch_memory(addr, ppc_inst_as_ulong(instr), addr, true);
+	else
+		return __patch_memory(addr, ppc_inst_val(instr), addr, false);
 }
 
 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
@@ -149,7 +156,7 @@  static void unmap_patch_area(unsigned long addr)
 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 }
 
-static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+static int __do_patch_memory(void *addr, unsigned long val, bool is_dword)
 {
 	int err;
 	u32 *patch_addr;
@@ -166,7 +173,7 @@  static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	if (radix_enabled())
 		asm volatile("ptesync": : :"memory");
 
-	err = __patch_instruction(addr, instr, patch_addr);
+	err = __patch_memory(patch_addr, val, addr, is_dword);
 
 	pte_clear(&init_mm, text_poke_addr, pte);
 	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
@@ -174,7 +181,7 @@  static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	return err;
 }
 
-int patch_instruction(u32 *addr, ppc_inst_t instr)
+static int do_patch_memory(void *addr, unsigned long val, bool is_dword)
 {
 	int err;
 	unsigned long flags;
@@ -186,15 +193,47 @@  int patch_instruction(u32 *addr, ppc_inst_t instr)
 	 */
 	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
 	    !static_branch_likely(&poking_init_done))
-		return raw_patch_instruction(addr, instr);
+		return __patch_memory(addr, val, addr, is_dword);
 
 	local_irq_save(flags);
-	err = __do_patch_instruction(addr, instr);
+	err = __do_patch_memory(addr, val, is_dword);
 	local_irq_restore(flags);
 
 	return err;
 }
-NOKPROBE_SYMBOL(patch_instruction);
+
+#ifdef CONFIG_PPC64
+
+int patch_uint(void *addr, unsigned int val)
+{
+	return do_patch_memory(addr, val, false);
+}
+NOKPROBE_SYMBOL(patch_uint)
+
+int patch_ulong(void *addr, unsigned long val)
+{
+	return do_patch_memory(addr, val, true);
+}
+NOKPROBE_SYMBOL(patch_ulong)
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	if (ppc_inst_prefixed(instr))
+		return patch_ulong(addr, ppc_inst_as_ulong(instr));
+	else
+		return patch_uint(addr, ppc_inst_val(instr));
+}
+NOKPROBE_SYMBOL(patch_instruction)
+
+#else
+
+noinline int patch_uint(void *addr, unsigned int val)
+{
+	return do_patch_memory(addr, val, false);
+}
+NOKPROBE_SYMBOL(patch_uint)
+
+#endif
 
 int patch_branch(u32 *addr, unsigned long target, int flags)
 {