diff mbox series

[v1,2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window()

Message ID 20240315025937.407590-2-bgray@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series [v1,1/2] powerpc/code-patching: Introduce open_patch_window()/close_patch_window() | expand

Commit Message

Benjamin Gray March 15, 2024, 2:59 a.m. UTC
The existing patching alias page setup and teardown sections can be
simplified to make use of the new open_patch_window() abstraction.

This eliminates the _mm variants of the helpers, consumers no longer
need to check mm_patch_enabled(), and consumers no longer need to worry
about synchronization and flushing beyond the changes they make in the
patching window.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
---
 arch/powerpc/lib/code-patching.c | 180 +++----------------------------
 1 file changed, 16 insertions(+), 164 deletions(-)

Comments

Christophe Leroy March 15, 2024, 8:38 a.m. UTC | #1
Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
> The existing patching alias page setup and teardown sections can be
> simplified to make use of the new open_patch_window() abstraction.
> 
> This eliminates the _mm variants of the helpers, consumers no longer
> need to check mm_patch_enabled(), and consumers no longer need to worry
> about synchronization and flushing beyond the changes they make in the
> patching window.

With this patch, the time needed to activate or de-activate function 
tracer is approx 10% longer on powerpc 8xx.

Christophe
Christophe Leroy March 16, 2024, 9:50 a.m. UTC | #2
Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
> 
> 
> Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
>> The existing patching alias page setup and teardown sections can be
>> simplified to make use of the new open_patch_window() abstraction.
>>
>> This eliminates the _mm variants of the helpers, consumers no longer
>> need to check mm_patch_enabled(), and consumers no longer need to worry
>> about synchronization and flushing beyond the changes they make in the
>> patching window.
> 
> With this patch, the time needed to activate or de-activate function 
> tracer is approx 10% longer on powerpc 8xx.

See below difference of patch_instruction() before and after your patch, 
both for 4k pages and 16k pages:

16k pages, before your patch:

00000278 <patch_instruction>:
  278:	48 00 00 84 	nop
  27c:	7c e0 00 a6 	mfmsr   r7
  280:	7c 51 13 a6 	mtspr   81,r2
  284:	3d 00 00 00 	lis     r8,0
			286: R_PPC_ADDR16_HA	.data
  288:	39 08 00 00 	addi    r8,r8,0
			28a: R_PPC_ADDR16_LO	.data
  28c:	7c 69 1b 78 	mr      r9,r3
  290:	3d 29 40 00 	addis   r9,r9,16384
  294:	81 48 00 08 	lwz     r10,8(r8)
  298:	55 29 00 22 	clrrwi  r9,r9,14
  29c:	81 08 00 04 	lwz     r8,4(r8)
  2a0:	61 29 01 2d 	ori     r9,r9,301
  2a4:	55 06 00 22 	clrrwi  r6,r8,14
  2a8:	91 2a 00 00 	stw     r9,0(r10)
  2ac:	91 2a 00 04 	stw     r9,4(r10)
  2b0:	91 2a 00 08 	stw     r9,8(r10)
  2b4:	91 2a 00 0c 	stw     r9,12(r10)
  2b8:	50 68 04 be 	rlwimi  r8,r3,0,18,31
  2bc:	90 88 00 00 	stw     r4,0(r8)
  2c0:	7c 00 40 6c 	dcbst   0,r8
  2c4:	7c 00 04 ac 	hwsync
  2c8:	7c 00 1f ac 	icbi    0,r3
  2cc:	7c 00 04 ac 	hwsync
  2d0:	4c 00 01 2c 	isync
  2d4:	38 60 00 00 	li      r3,0
  2d8:	39 20 00 00 	li      r9,0
  2dc:	91 2a 00 00 	stw     r9,0(r10)
  2e0:	91 2a 00 04 	stw     r9,4(r10)
  2e4:	91 2a 00 08 	stw     r9,8(r10)
  2e8:	91 2a 00 0c 	stw     r9,12(r10)
  2ec:	7c 00 32 64 	tlbie   r6,r0
  2f0:	7c 00 04 ac 	hwsync
  2f4:	7c e0 01 24 	mtmsr   r7
  2f8:	4e 80 00 20 	blr

16k pages, after your patch. Now we have a stack frame for the call to 
close_patch_window(). And the branch in close_patch_window() is 
unexpected as patch_instruction() works on single pages.

0000024c <close_patch_window>:
  24c:	81 23 00 04 	lwz     r9,4(r3)
  250:	39 40 00 00 	li      r10,0
  254:	91 49 00 00 	stw     r10,0(r9)
  258:	91 49 00 04 	stw     r10,4(r9)
  25c:	91 49 00 08 	stw     r10,8(r9)
  260:	91 49 00 0c 	stw     r10,12(r9)
  264:	81 23 00 00 	lwz     r9,0(r3)
  268:	55 2a 00 22 	clrrwi  r10,r9,14
  26c:	39 29 40 00 	addi    r9,r9,16384
  270:	7d 2a 48 50 	subf    r9,r10,r9
  274:	28 09 40 00 	cmplwi  r9,16384
  278:	41 81 00 10 	bgt     288 <close_patch_window+0x3c>
  27c:	7c 00 52 64 	tlbie   r10,r0
  280:	7c 00 04 ac 	hwsync
  284:	4e 80 00 20 	blr
  288:	7c 00 04 ac 	hwsync
  28c:	7c 00 02 e4 	tlbia
  290:	4c 00 01 2c 	isync
  294:	4e 80 00 20 	blr

000002c4 <patch_instruction>:
  2c4:	94 21 ff d0 	stwu    r1,-48(r1)
  2c8:	93 c1 00 28 	stw     r30,40(r1)
  2cc:	48 00 00 ac 	nop
  2d0:	7c 08 02 a6 	mflr    r0
  2d4:	90 01 00 34 	stw     r0,52(r1)
  2d8:	93 e1 00 2c 	stw     r31,44(r1)
  2dc:	7f e0 00 a6 	mfmsr   r31
  2e0:	7c 51 13 a6 	mtspr   81,r2
  2e4:	3d 40 00 00 	lis     r10,0
			2e6: R_PPC_ADDR16_HA	.data
  2e8:	39 4a 00 00 	addi    r10,r10,0
			2ea: R_PPC_ADDR16_LO	.data
  2ec:	7c 69 1b 78 	mr      r9,r3
  2f0:	3d 29 40 00 	addis   r9,r9,16384
  2f4:	81 0a 00 08 	lwz     r8,8(r10)
  2f8:	80 ca 00 04 	lwz     r6,4(r10)
  2fc:	55 29 00 22 	clrrwi  r9,r9,14
  300:	61 29 01 2d 	ori     r9,r9,301
  304:	38 e0 00 00 	li      r7,0
  308:	54 6a 04 be 	clrlwi  r10,r3,18
  30c:	91 28 00 00 	stw     r9,0(r8)
  310:	91 28 00 04 	stw     r9,4(r8)
  314:	91 28 00 08 	stw     r9,8(r8)
  318:	91 28 00 0c 	stw     r9,12(r8)
  31c:	91 01 00 0c 	stw     r8,12(r1)
  320:	90 c1 00 08 	stw     r6,8(r1)
  324:	7d 4a 32 14 	add     r10,r10,r6
  328:	90 e1 00 10 	stw     r7,16(r1)
  32c:	90 e1 00 14 	stw     r7,20(r1)
  330:	90 e1 00 18 	stw     r7,24(r1)
  334:	90 8a 00 00 	stw     r4,0(r10)
  338:	7c 00 50 6c 	dcbst   0,r10
  33c:	7c 00 04 ac 	hwsync
  340:	7c 00 1f ac 	icbi    0,r3
  344:	7c 00 04 ac 	hwsync
  348:	4c 00 01 2c 	isync
  34c:	3b c0 00 00 	li      r30,0
  350:	38 61 00 08 	addi    r3,r1,8
  354:	4b ff fe f9 	bl      24c <close_patch_window>
  358:	7f e0 01 24 	mtmsr   r31
  35c:	80 01 00 34 	lwz     r0,52(r1)
  360:	83 e1 00 2c 	lwz     r31,44(r1)
  364:	7c 08 03 a6 	mtlr    r0
  368:	7f c3 f3 78 	mr      r3,r30
  36c:	83 c1 00 28 	lwz     r30,40(r1)
  370:	38 21 00 30 	addi    r1,r1,48
  374:	4e 80 00 20 	blr

4k pages before your patch:

00000268 <patch_instruction>:
  268:	48 00 00 6c 	nop
  26c:	7c e0 00 a6 	mfmsr   r7
  270:	7c 51 13 a6 	mtspr   81,r2
  274:	3d 40 00 00 	lis     r10,0
			276: R_PPC_ADDR16_HA	.data
  278:	39 4a 00 00 	addi    r10,r10,0
			27a: R_PPC_ADDR16_LO	.data
  27c:	7c 69 1b 78 	mr      r9,r3
  280:	3d 29 40 00 	addis   r9,r9,16384
  284:	81 0a 00 08 	lwz     r8,8(r10)
  288:	55 29 00 26 	clrrwi  r9,r9,12
  28c:	81 4a 00 04 	lwz     r10,4(r10)
  290:	61 29 01 25 	ori     r9,r9,293
  294:	91 28 00 00 	stw     r9,0(r8)
  298:	55 49 00 26 	clrrwi  r9,r10,12
  29c:	50 6a 05 3e 	rlwimi  r10,r3,0,20,31
  2a0:	90 8a 00 00 	stw     r4,0(r10)
  2a4:	7c 00 50 6c 	dcbst   0,r10
  2a8:	7c 00 04 ac 	hwsync
  2ac:	7c 00 1f ac 	icbi    0,r3
  2b0:	7c 00 04 ac 	hwsync
  2b4:	4c 00 01 2c 	isync
  2b8:	38 60 00 00 	li      r3,0
  2bc:	39 40 00 00 	li      r10,0
  2c0:	91 48 00 00 	stw     r10,0(r8)
  2c4:	7c 00 4a 64 	tlbie   r9,r0
  2c8:	7c 00 04 ac 	hwsync
  2cc:	7c e0 01 24 	mtmsr   r7
  2d0:	4e 80 00 20 	blr

4k pages after your patch. Here close_patch_window() is inlined but the 
test at the end is crazy, we've been patching one instruction with the 
assumption it is properly aligned, so why is there a branch with a tlbia 
whereas only one page needs to be invalidated ?:

00000268 <patch_instruction>:
  268:	48 00 00 84 	nop
  26c:	7c c0 00 a6 	mfmsr   r6
  270:	7c 51 13 a6 	mtspr   81,r2
  274:	3d 40 00 00 	lis     r10,0
			276: R_PPC_ADDR16_HA	.data
  278:	39 4a 00 00 	addi    r10,r10,0
			27a: R_PPC_ADDR16_LO	.data
  27c:	7c 69 1b 78 	mr      r9,r3
  280:	3d 29 40 00 	addis   r9,r9,16384
  284:	80 ea 00 08 	lwz     r7,8(r10)
  288:	55 29 00 26 	clrrwi  r9,r9,12
  28c:	81 4a 00 04 	lwz     r10,4(r10)
  290:	61 29 01 25 	ori     r9,r9,293
  294:	54 68 05 3e 	clrlwi  r8,r3,20
  298:	91 27 00 00 	stw     r9,0(r7)
  29c:	7d 08 52 14 	add     r8,r8,r10
  2a0:	90 88 00 00 	stw     r4,0(r8)
  2a4:	7c 00 40 6c 	dcbst   0,r8
  2a8:	7c 00 04 ac 	hwsync
  2ac:	7c 00 1f ac 	icbi    0,r3
  2b0:	7c 00 04 ac 	hwsync
  2b4:	4c 00 01 2c 	isync
  2b8:	38 60 00 00 	li      r3,0
  2bc:	55 49 00 26 	clrrwi  r9,r10,12
  2c0:	39 4a 10 00 	addi    r10,r10,4096
  2c4:	7d 49 50 50 	subf    r10,r9,r10
  2c8:	28 0a 10 00 	cmplwi  r10,4096
  2cc:	39 40 00 00 	li      r10,0
  2d0:	91 47 00 00 	stw     r10,0(r7)
  2d4:	40 81 00 38 	ble     30c <patch_instruction+0xa4>
  2d8:	7c 00 04 ac 	hwsync
  2dc:	7c 00 02 e4 	tlbia
  2e0:	4c 00 01 2c 	isync
  2e4:	7c c0 01 24 	mtmsr   r6
  2e8:	4e 80 00 20 	blr
...
  30c:	7c 00 4a 64 	tlbie   r9,r0
  310:	7c 00 04 ac 	hwsync
  314:	7c c0 01 24 	mtmsr   r6
  318:	4e 80 00 20 	blr
Christophe Leroy March 16, 2024, 10:10 a.m. UTC | #3
Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
> 
> 
> Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
>> The existing patching alias page setup and teardown sections can be
>> simplified to make use of the new open_patch_window() abstraction.
>>
>> This eliminates the _mm variants of the helpers, consumers no longer
>> need to check mm_patch_enabled(), and consumers no longer need to worry
>> about synchronization and flushing beyond the changes they make in the
>> patching window.
> 
> With this patch, the time needed to activate or de-activate function 
> tracer is approx 10% longer on powerpc 8xx.

With the following changes, the performance is restored:

diff --git a/arch/powerpc/lib/code-patching.c 
b/arch/powerpc/lib/code-patching.c
index fd6f8576033a..bc92b85913d8 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -282,13 +282,13 @@ struct patch_window {
   * Interrupts must be disabled for the entire duration of the 
patching. The PIDR
   * is potentially changed during this time.
   */
-static int open_patch_window(void *addr, struct patch_window *ctx)
+static __always_inline int open_patch_window(void *addr, struct 
patch_window *ctx)
  {
  	unsigned long pfn = get_patch_pfn(addr);

  	lockdep_assert_irqs_disabled();

-	ctx->text_poke_addr = (unsigned 
long)__this_cpu_read(cpu_patching_context.addr);
+	ctx->text_poke_addr = (unsigned 
long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;

  	if (!mm_patch_enabled()) {
  		ctx->ptep = __this_cpu_read(cpu_patching_context.pte);
@@ -331,7 +331,7 @@ static int open_patch_window(void *addr, struct 
patch_window *ctx)
  	return 0;
  }

-static void close_patch_window(struct patch_window *ctx)
+static __always_inline void close_patch_window(struct patch_window *ctx)
  {
  	lockdep_assert_irqs_disabled();
Benjamin Gray March 17, 2024, 9:34 p.m. UTC | #4
On Sat, 2024-03-16 at 10:10 +0000, Christophe Leroy wrote:
> 
> 
> Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
> > 
> > 
> > Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
> > > The existing patching alias page setup and teardown sections can
> > > be
> > > simplified to make use of the new open_patch_window()
> > > abstraction.
> > > 
> > > This eliminates the _mm variants of the helpers, consumers no
> > > longer
> > > need to check mm_patch_enabled(), and consumers no longer need to
> > > worry
> > > about synchronization and flushing beyond the changes they make
> > > in the
> > > patching window.
> > 
> > With this patch, the time needed to activate or de-activate
> > function 
> > tracer is approx 10% longer on powerpc 8xx.
> 
> With the following changes, the performance is restored:
> 
> diff --git a/arch/powerpc/lib/code-patching.c 
> b/arch/powerpc/lib/code-patching.c
> index fd6f8576033a..bc92b85913d8 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -282,13 +282,13 @@ struct patch_window {
>    * Interrupts must be disabled for the entire duration of the 
> patching. The PIDR
>    * is potentially changed during this time.
>    */
> -static int open_patch_window(void *addr, struct patch_window *ctx)
> +static __always_inline int open_patch_window(void *addr, struct 
> patch_window *ctx)
>   {
>   	unsigned long pfn = get_patch_pfn(addr);
> 
>   	lockdep_assert_irqs_disabled();
> 
> -	ctx->text_poke_addr = (unsigned 
> long)__this_cpu_read(cpu_patching_context.addr);
> +	ctx->text_poke_addr = (unsigned 
> long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
> 
>   	if (!mm_patch_enabled()) {
>   		ctx->ptep =
> __this_cpu_read(cpu_patching_context.pte);
> @@ -331,7 +331,7 @@ static int open_patch_window(void *addr, struct 
> patch_window *ctx)
>   	return 0;
>   }
> 
> -static void close_patch_window(struct patch_window *ctx)
> +static __always_inline void close_patch_window(struct patch_window
> *ctx)
>   {
>   	lockdep_assert_irqs_disabled();
> 
> 

Thanks for checking that. I did restore the page mask optimisation in a
later patch while still developing, but the 64-bit assembly looked
slightly worse for it. I didn't check the 32-bit; no way to benchmark
it anyway.
diff mbox series

Patch

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d1b812f84154..fd6f8576033a 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -66,40 +66,6 @@  static bool mm_patch_enabled(void)
 	return IS_ENABLED(CONFIG_SMP) && radix_enabled();
 }
 
-/*
- * The following applies for Radix MMU. Hash MMU has different requirements,
- * and so is not supported.
- *
- * Changing mm requires context synchronising instructions on both sides of
- * the context switch, as well as a hwsync between the last instruction for
- * which the address of an associated storage access was translated using
- * the current context.
- *
- * switch_mm_irqs_off() performs an isync after the context switch. It is
- * the responsibility of the caller to perform the CSI and hwsync before
- * starting/stopping the temp mm.
- */
-static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
-{
-	struct mm_struct *orig_mm = current->active_mm;
-
-	lockdep_assert_irqs_disabled();
-	switch_mm_irqs_off(orig_mm, temp_mm, current);
-
-	WARN_ON(!mm_is_thread_local(temp_mm));
-
-	suspend_breakpoints();
-	return orig_mm;
-}
-
-static void stop_using_temp_mm(struct mm_struct *temp_mm,
-			       struct mm_struct *orig_mm)
-{
-	lockdep_assert_irqs_disabled();
-	switch_mm_irqs_off(temp_mm, orig_mm, current);
-	restore_breakpoints();
-}
-
 static int text_area_cpu_up(unsigned int cpu)
 {
 	struct vm_struct *area;
@@ -389,73 +355,20 @@  static void close_patch_window(struct patch_window *ctx)
 	pte_unmap_unlock(ctx->ptep, ctx->ptl);
 }
 
-static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
-{
-	int err;
-	u32 *patch_addr;
-	unsigned long text_poke_addr;
-	pte_t *pte;
-	unsigned long pfn = get_patch_pfn(addr);
-	struct mm_struct *patching_mm;
-	struct mm_struct *orig_mm;
-	spinlock_t *ptl;
-
-	patching_mm = __this_cpu_read(cpu_patching_context.mm);
-	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
-	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-
-	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
-	if (!pte)
-		return -ENOMEM;
-
-	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
-
-	/* order PTE update before use, also serves as the hwsync */
-	asm volatile("ptesync": : :"memory");
-
-	/* order context switch after arbitrary prior code */
-	isync();
-
-	orig_mm = start_using_temp_mm(patching_mm);
-
-	err = __patch_instruction(addr, instr, patch_addr);
-
-	/* context synchronisation performed by __patch_instruction (isync or exception) */
-	stop_using_temp_mm(patching_mm, orig_mm);
-
-	pte_clear(patching_mm, text_poke_addr, pte);
-	/*
-	 * ptesync to order PTE update before TLB invalidation done
-	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
-	 */
-	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
-
-	pte_unmap_unlock(pte, ptl);
-
-	return err;
-}
-
 static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 {
-	int err;
+	struct patch_window ctx;
 	u32 *patch_addr;
-	unsigned long text_poke_addr;
-	pte_t *pte;
-	unsigned long pfn = get_patch_pfn(addr);
-
-	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
-	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+	int err;
 
-	pte = __this_cpu_read(cpu_patching_context.pte);
-	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
-	/* See ptesync comment in radix__set_pte_at() */
-	if (radix_enabled())
-		asm volatile("ptesync": : :"memory");
+	err = open_patch_window(addr, &ctx);
+	if (err)
+		return err;
 
+	patch_addr = (u32 *)(ctx.text_poke_addr + offset_in_page(addr));
 	err = __patch_instruction(addr, instr, patch_addr);
 
-	pte_clear(&init_mm, text_poke_addr, pte);
-	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+	close_patch_window(&ctx);
 
 	return err;
 }
@@ -475,10 +388,7 @@  int patch_instruction(u32 *addr, ppc_inst_t instr)
 		return raw_patch_instruction(addr, instr);
 
 	local_irq_save(flags);
-	if (mm_patch_enabled())
-		err = __do_patch_instruction_mm(addr, instr);
-	else
-		err = __do_patch_instruction(addr, instr);
+	err = __do_patch_instruction(addr, instr);
 	local_irq_restore(flags);
 
 	return err;
@@ -545,80 +455,25 @@  static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool rep
 	return err;
 }
 
-/*
- * A page is mapped and instructions that fit the page are patched.
- * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
- */
-static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
-{
-	struct mm_struct *patching_mm, *orig_mm;
-	unsigned long pfn = get_patch_pfn(addr);
-	unsigned long text_poke_addr;
-	spinlock_t *ptl;
-	u32 *patch_addr;
-	pte_t *pte;
-	int err;
-
-	patching_mm = __this_cpu_read(cpu_patching_context.mm);
-	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
-	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-
-	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
-	if (!pte)
-		return -ENOMEM;
-
-	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
-
-	/* order PTE update before use, also serves as the hwsync */
-	asm volatile("ptesync" ::: "memory");
-
-	/* order context switch after arbitrary prior code */
-	isync();
-
-	orig_mm = start_using_temp_mm(patching_mm);
-
-	err = __patch_instructions(patch_addr, code, len, repeat_instr);
-
-	/* context synchronisation performed by __patch_instructions */
-	stop_using_temp_mm(patching_mm, orig_mm);
-
-	pte_clear(patching_mm, text_poke_addr, pte);
-	/*
-	 * ptesync to order PTE update before TLB invalidation done
-	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
-	 */
-	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
-
-	pte_unmap_unlock(pte, ptl);
-
-	return err;
-}
-
 /*
  * A page is mapped and instructions that fit the page are patched.
  * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
  */
 static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
 {
-	unsigned long pfn = get_patch_pfn(addr);
-	unsigned long text_poke_addr;
+	struct patch_window ctx;
 	u32 *patch_addr;
-	pte_t *pte;
 	int err;
 
-	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
-	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-
-	pte = __this_cpu_read(cpu_patching_context.pte);
-	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
-	/* See ptesync comment in radix__set_pte_at() */
-	if (radix_enabled())
-		asm volatile("ptesync" ::: "memory");
+	err = open_patch_window(addr, &ctx);
+	if (err)
+		return err;
 
+	patch_addr = (u32 *)(ctx.text_poke_addr + offset_in_page(addr));
 	err = __patch_instructions(patch_addr, code, len, repeat_instr);
 
-	pte_clear(&init_mm, text_poke_addr, pte);
-	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+	/* context synchronisation performed by __patch_instructions */
+	close_patch_window(&ctx);
 
 	return err;
 }
@@ -639,10 +494,7 @@  int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
 		plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
 
 		local_irq_save(flags);
-		if (mm_patch_enabled())
-			err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
-		else
-			err = __do_patch_instructions(addr, code, plen, repeat_instr);
+		err = __do_patch_instructions(addr, code, plen, repeat_instr);
 		local_irq_restore(flags);
 		if (err)
 			return err;