Message ID | 20221010002957.128276-2-bgray@linux.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | Out-of-line static calls for powerpc64 ELF V2 | expand |
On Mon, 2022-10-10 at 11:29 +1100, Benjamin Gray wrote: > Adds a generic memory patching mechanism for patches of size int or > long > bytes. > > The patch_instruction function is reimplemented in terms of this > more generic function. This generic implementation allows patching of > arbitrary long data, such as pointers on 64-bit. As a performance > optimisation the patch will unconditionally flush the icache, as > patch_instruction is used significantly more often and in more > performance sensitive contexts (e.g., ftrace activation). > > On 32-bit patch_int is marked noinline to prevent a mis-optimisation. > Without noinline, inside patch_branch the compiler may inline all the > way to do_patch_memory, preventing the compiler from inlining > do_patch_memory into patch_int. This would needlessly force patch_int > to be a branch to do_patch_memory. > > The 'IS_ENABLED(CONFIG_PPC64) && ' part of the patch size condition > helps GCC inline __patch_memory properly. Only 64-bit requires > doubleword writes anyway, as ints and longs are the same size on 32- > bit. > > Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> It looks like this needs rebasing. Otherwise I gave this a read over and didn't spot anything, but I'll hold off on a reviewed-by tag until it's rebased. > --- > arch/powerpc/include/asm/code-patching.h | 29 ++++++++++ > arch/powerpc/lib/code-patching.c | 73 ++++++++++++++++++---- > -- > 2 files changed, 85 insertions(+), 17 deletions(-) > > diff --git a/arch/powerpc/include/asm/code-patching.h > b/arch/powerpc/include/asm/code-patching.h > index 3f881548fb61..170bfa848c7c 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t *instr, > const u32 *addr, > int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > unsigned long target, int flags); > int patch_branch(u32 *addr, unsigned long target, int flags); > + > +/* patch_uint and patch_ulong must only be called on addresses where > the patch > + * does not cross a cacheline, otherwise it may not be flushed > properly and > + * mixes of new and stale data may be observed. > + * > + * patch_instruction and other instruction patchers automatically > satisfy this > + * requirement due to instruction alignment requirements. > + */ > + > +int patch_uint(void *addr, unsigned int val); > + > +#ifdef CONFIG_PPC64 > + > +int patch_ulong(void *addr, unsigned long val); > int patch_instruction(u32 *addr, ppc_inst_t instr); > + > +#else > + > +static inline int patch_ulong(void *addr, unsigned long val) > +{ > + return patch_uint(addr, val); > +} > + > +static inline int patch_instruction(u32 *addr, ppc_inst_t instr) > +{ > + return patch_uint(addr, ppc_inst_val(instr)); > +} > + > +#endif > + > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > > static inline unsigned long patch_site_addr(s32 *site) > diff --git a/arch/powerpc/lib/code-patching.c > b/arch/powerpc/lib/code-patching.c > index 125c55e3e148..e41c0ccec79f 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -15,20 +15,24 @@ > #include <asm/code-patching.h> > #include <asm/inst.h> > > -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 > *patch_addr) > +static int __patch_memory(void *patch_addr, unsigned long val, void > *exec_addr, > + bool is_dword) > { > - if (!ppc_inst_prefixed(instr)) { > - u32 val = ppc_inst_val(instr); > - > - __put_kernel_nofault(patch_addr, &val, u32, failed); > - } else { > - u64 val = ppc_inst_as_ulong(instr); > + /* Prefixed instruction may cross cacheline if cacheline > smaller than 64 bytes */ > + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < > 64); > > + if (IS_ENABLED(CONFIG_PPC64) && unlikely(is_dword)) > __put_kernel_nofault(patch_addr, &val, u64, failed); > - } > + else > + __put_kernel_nofault(patch_addr, &val, u32, failed); > > - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" > (patch_addr), > - "r" > (exec_addr)); > + /* Assume data is inside a single cacheline */ > + dcbst(patch_addr); > + mb(); /* sync */ > + /* Flush on the EA that may be executed in case of a non- > coherent icache */ > + icbi(exec_addr); > + mb(); /* sync */ > + isync(); > > return 0; > > @@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, > ppc_inst_t instr, u32 *patch_addr > > int raw_patch_instruction(u32 *addr, ppc_inst_t instr) > { > - return __patch_instruction(addr, instr, addr); > + if (ppc_inst_prefixed(instr)) > + return __patch_memory(addr, ppc_inst_as_ulong(instr), > addr, true); > + else > + return __patch_memory(addr, ppc_inst_val(instr), > addr, false); > } > > static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); > @@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long addr) > flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > } > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __do_patch_memory(void *addr, unsigned long val, bool > is_dword) > { > int err; > u32 *patch_addr; > @@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, > ppc_inst_t instr) > if (radix_enabled()) > asm volatile("ptesync": : :"memory"); > > - err = __patch_instruction(addr, instr, patch_addr); > + err = __patch_memory(patch_addr, val, addr, is_dword); > > pte_clear(&init_mm, text_poke_addr, pte); > flush_tlb_kernel_range(text_poke_addr, text_poke_addr + > PAGE_SIZE); > @@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, > ppc_inst_t instr) > return err; > } > > -int patch_instruction(u32 *addr, ppc_inst_t instr) > +static int do_patch_memory(void *addr, unsigned long val, bool > is_dword) > { > int err; > unsigned long flags; > @@ -186,15 +193,47 @@ int patch_instruction(u32 *addr, ppc_inst_t > instr) > */ > if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || > !static_branch_likely(&poking_init_done)) > - return raw_patch_instruction(addr, instr); > + return __patch_memory(addr, val, addr, is_dword); > > local_irq_save(flags); > - err = __do_patch_instruction(addr, instr); > + err = __do_patch_memory(addr, val, is_dword); > local_irq_restore(flags); > > return err; > } > -NOKPROBE_SYMBOL(patch_instruction); > + > +#ifdef CONFIG_PPC64 > + > +int patch_uint(void *addr, unsigned int val) > +{ > + return do_patch_memory(addr, val, false); > +} > +NOKPROBE_SYMBOL(patch_uint) > + > +int patch_ulong(void *addr, unsigned long val) > +{ > + return do_patch_memory(addr, val, true); > +} > +NOKPROBE_SYMBOL(patch_ulong) > + > +int patch_instruction(u32 *addr, ppc_inst_t instr) > +{ > + if (ppc_inst_prefixed(instr)) > + return patch_ulong(addr, ppc_inst_as_ulong(instr)); > + else > + return patch_uint(addr, ppc_inst_val(instr)); > +} > +NOKPROBE_SYMBOL(patch_instruction) > + > +#else > + > +noinline int patch_uint(void *addr, unsigned int val) > +{ > + return do_patch_memory(addr, val, false); > +} > +NOKPROBE_SYMBOL(patch_uint) > + > +#endif > > int patch_branch(u32 *addr, unsigned long target, int flags) > {
On Mon, 2022-10-10 at 15:45 +1100, Andrew Donnellan wrote: > On Mon, 2022-10-10 at 11:29 +1100, Benjamin Gray wrote: > > Adds a generic memory patching mechanism for patches of size int or > > long > > bytes. > > > > The patch_instruction function is reimplemented in terms of this > > more generic function. This generic implementation allows patching > > of > > arbitrary long data, such as pointers on 64-bit. As a performance > > optimisation the patch will unconditionally flush the icache, as > > patch_instruction is used significantly more often and in more > > performance sensitive contexts (e.g., ftrace activation). > > > > On 32-bit patch_int is marked noinline to prevent a mis- > > optimisation. > > Without noinline, inside patch_branch the compiler may inline all > > the > > way to do_patch_memory, preventing the compiler from inlining > > do_patch_memory into patch_int. This would needlessly force > > patch_int > > to be a branch to do_patch_memory. > > > > The 'IS_ENABLED(CONFIG_PPC64) && ' part of the patch size condition > > helps GCC inline __patch_memory properly. Only 64-bit requires > > doubleword writes anyway, as ints and longs are the same size on > > 32- > > bit. > > > > Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> > > It looks like this needs rebasing. Alternatively, I just need to learn to read cover letters properly and notice that this is based off another series... > > Otherwise I gave this a read over and didn't spot anything, but I'll > hold off on a reviewed-by tag until it's rebased. > Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com> > > --- > > arch/powerpc/include/asm/code-patching.h | 29 ++++++++++ > > arch/powerpc/lib/code-patching.c | 73 ++++++++++++++++++-- > > -- > > -- > > 2 files changed, 85 insertions(+), 17 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/code-patching.h > > b/arch/powerpc/include/asm/code-patching.h > > index 3f881548fb61..170bfa848c7c 100644 > > --- a/arch/powerpc/include/asm/code-patching.h > > +++ b/arch/powerpc/include/asm/code-patching.h > > @@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t > > *instr, > > const u32 *addr, > > int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > > unsigned long target, int flags); > > int patch_branch(u32 *addr, unsigned long target, int flags); > > + > > +/* patch_uint and patch_ulong must only be called on addresses > > where > > the patch > > + * does not cross a cacheline, otherwise it may not be flushed > > properly and > > + * mixes of new and stale data may be observed. > > + * > > + * patch_instruction and other instruction patchers automatically > > satisfy this > > + * requirement due to instruction alignment requirements. > > + */ > > + > > +int patch_uint(void *addr, unsigned int val); > > + > > +#ifdef CONFIG_PPC64 > > + > > +int patch_ulong(void *addr, unsigned long val); > > int patch_instruction(u32 *addr, ppc_inst_t instr); > > + > > +#else > > + > > +static inline int patch_ulong(void *addr, unsigned long val) > > +{ > > + return patch_uint(addr, val); > > +} > > + > > +static inline int patch_instruction(u32 *addr, ppc_inst_t instr) > > +{ > > + return patch_uint(addr, ppc_inst_val(instr)); > > +} > > + > > +#endif > > + > > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > > > > static inline unsigned long patch_site_addr(s32 *site) > > diff --git a/arch/powerpc/lib/code-patching.c > > b/arch/powerpc/lib/code-patching.c > > index 125c55e3e148..e41c0ccec79f 100644 > > --- a/arch/powerpc/lib/code-patching.c > > +++ b/arch/powerpc/lib/code-patching.c > > @@ -15,20 +15,24 @@ > > #include <asm/code-patching.h> > > #include <asm/inst.h> > > > > -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, > > u32 > > *patch_addr) > > +static int __patch_memory(void *patch_addr, unsigned long val, > > void > > *exec_addr, > > + bool is_dword) > > { > > - if (!ppc_inst_prefixed(instr)) { > > - u32 val = ppc_inst_val(instr); > > - > > - __put_kernel_nofault(patch_addr, &val, u32, > > failed); > > - } else { > > - u64 val = ppc_inst_as_ulong(instr); > > + /* Prefixed instruction may cross cacheline if cacheline > > smaller than 64 bytes */ > > + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < > > 64); > > > > + if (IS_ENABLED(CONFIG_PPC64) && unlikely(is_dword)) > > __put_kernel_nofault(patch_addr, &val, u64, > > failed); > > - } > > + else > > + __put_kernel_nofault(patch_addr, &val, u32, > > failed); > > > > - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" > > (patch_addr), > > - "r" > > (exec_addr)); > > + /* Assume data is inside a single cacheline */ > > + dcbst(patch_addr); > > + mb(); /* sync */ > > + /* Flush on the EA that may be executed in case of a non- > > coherent icache */ > > + icbi(exec_addr); > > + mb(); /* sync */ > > + isync(); > > > > return 0; > > > > @@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, > > ppc_inst_t instr, u32 *patch_addr > > > > int raw_patch_instruction(u32 *addr, ppc_inst_t instr) > > { > > - return __patch_instruction(addr, instr, addr); > > + if (ppc_inst_prefixed(instr)) > > + return __patch_memory(addr, > > ppc_inst_as_ulong(instr), > > addr, true); > > + else > > + return __patch_memory(addr, ppc_inst_val(instr), > > addr, false); > > } > > > > static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); > > @@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long > > addr) > > flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > > } > > > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > > +static int __do_patch_memory(void *addr, unsigned long val, bool > > is_dword) > > { > > int err; > > u32 *patch_addr; > > @@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, > > ppc_inst_t instr) > > if (radix_enabled()) > > asm volatile("ptesync": : :"memory"); > > > > - err = __patch_instruction(addr, instr, patch_addr); > > + err = __patch_memory(patch_addr, val, addr, is_dword); > > > > pte_clear(&init_mm, text_poke_addr, pte); > > flush_tlb_kernel_range(text_poke_addr, text_poke_addr + > > PAGE_SIZE); > > @@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, > > ppc_inst_t instr) > > return err; > > } > > > > -int patch_instruction(u32 *addr, ppc_inst_t instr) > > +static int do_patch_memory(void *addr, unsigned long val, bool > > is_dword) > > { > > int err; > > unsigned long flags; > > @@ -186,15 +193,47 @@ int patch_instruction(u32 *addr, ppc_inst_t > > instr) > > */ > > if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || > > !static_branch_likely(&poking_init_done)) > > - return raw_patch_instruction(addr, instr); > > + return __patch_memory(addr, val, addr, is_dword); > > > > local_irq_save(flags); > > - err = __do_patch_instruction(addr, instr); > > + err = __do_patch_memory(addr, val, is_dword); > > local_irq_restore(flags); > > > > return err; > > } > > -NOKPROBE_SYMBOL(patch_instruction); > > + > > +#ifdef CONFIG_PPC64 > > + > > +int patch_uint(void *addr, unsigned int val) > > +{ > > + return do_patch_memory(addr, val, false); > > +} > > +NOKPROBE_SYMBOL(patch_uint) > > + > > +int patch_ulong(void *addr, unsigned long val) > > +{ > > + return do_patch_memory(addr, val, true); > > +} > > +NOKPROBE_SYMBOL(patch_ulong) > > + > > +int patch_instruction(u32 *addr, ppc_inst_t instr) > > +{ > > + if (ppc_inst_prefixed(instr)) > > + return patch_ulong(addr, ppc_inst_as_ulong(instr)); > > + else > > + return patch_uint(addr, ppc_inst_val(instr)); > > +} > > +NOKPROBE_SYMBOL(patch_instruction) > > + > > +#else > > + > > +noinline int patch_uint(void *addr, unsigned int val) > > +{ > > + return do_patch_memory(addr, val, false); > > +} > > +NOKPROBE_SYMBOL(patch_uint) > > + > > +#endif > > > > int patch_branch(u32 *addr, unsigned long target, int flags) > > { >
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 3f881548fb61..170bfa848c7c 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t *instr, const u32 *addr, int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); int patch_branch(u32 *addr, unsigned long target, int flags); + +/* patch_uint and patch_ulong must only be called on addresses where the patch + * does not cross a cacheline, otherwise it may not be flushed properly and + * mixes of new and stale data may be observed. + * + * patch_instruction and other instruction patchers automatically satisfy this + * requirement due to instruction alignment requirements. + */ + +int patch_uint(void *addr, unsigned int val); + +#ifdef CONFIG_PPC64 + +int patch_ulong(void *addr, unsigned long val); int patch_instruction(u32 *addr, ppc_inst_t instr); + +#else + +static inline int patch_ulong(void *addr, unsigned long val) +{ + return patch_uint(addr, val); +} + +static inline int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + return patch_uint(addr, ppc_inst_val(instr)); +} + +#endif + int raw_patch_instruction(u32 *addr, ppc_inst_t instr); static inline unsigned long patch_site_addr(s32 *site) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 125c55e3e148..e41c0ccec79f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -15,20 +15,24 @@ #include <asm/code-patching.h> #include <asm/inst.h> -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) +static int __patch_memory(void *patch_addr, unsigned long val, void *exec_addr, + bool is_dword) { - if (!ppc_inst_prefixed(instr)) { - u32 val = ppc_inst_val(instr); - - __put_kernel_nofault(patch_addr, &val, u32, failed); - } else { - u64 val = ppc_inst_as_ulong(instr); + /* Prefixed instruction may cross cacheline if cacheline smaller than 64 bytes */ + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64); + if (IS_ENABLED(CONFIG_PPC64) && unlikely(is_dword)) __put_kernel_nofault(patch_addr, &val, u64, failed); - } + else + __put_kernel_nofault(patch_addr, &val, u32, failed); - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), - "r" (exec_addr)); + /* Assume data is inside a single cacheline */ + dcbst(patch_addr); + mb(); /* sync */ + /* Flush on the EA that may be executed in case of a non-coherent icache */ + icbi(exec_addr); + mb(); /* sync */ + isync(); return 0; @@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { - return __patch_instruction(addr, instr, addr); + if (ppc_inst_prefixed(instr)) + return __patch_memory(addr, ppc_inst_as_ulong(instr), addr, true); + else + return __patch_memory(addr, ppc_inst_val(instr), addr, false); } static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); @@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long addr) flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int __do_patch_memory(void *addr, unsigned long val, bool is_dword) { int err; u32 *patch_addr; @@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) if (radix_enabled()) asm volatile("ptesync": : :"memory"); - err = __patch_instruction(addr, instr, patch_addr); + err = __patch_memory(patch_addr, val, addr, is_dword); pte_clear(&init_mm, text_poke_addr, pte); flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); @@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -int patch_instruction(u32 *addr, ppc_inst_t instr) +static int do_patch_memory(void *addr, unsigned long val, bool is_dword) { int err; unsigned long flags; @@ -186,15 +193,47 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) */ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || !static_branch_likely(&poking_init_done)) - return raw_patch_instruction(addr, instr); + return __patch_memory(addr, val, addr, is_dword); local_irq_save(flags); - err = __do_patch_instruction(addr, instr); + err = __do_patch_memory(addr, val, is_dword); local_irq_restore(flags); return err; } -NOKPROBE_SYMBOL(patch_instruction); + +#ifdef CONFIG_PPC64 + +int patch_uint(void *addr, unsigned int val) +{ + return do_patch_memory(addr, val, false); +} +NOKPROBE_SYMBOL(patch_uint) + +int patch_ulong(void *addr, unsigned long val) +{ + return do_patch_memory(addr, val, true); +} +NOKPROBE_SYMBOL(patch_ulong) + +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + if (ppc_inst_prefixed(instr)) + return patch_ulong(addr, ppc_inst_as_ulong(instr)); + else + return patch_uint(addr, ppc_inst_val(instr)); +} +NOKPROBE_SYMBOL(patch_instruction) + +#else + +noinline int patch_uint(void *addr, unsigned int val) +{ + return do_patch_memory(addr, val, false); +} +NOKPROBE_SYMBOL(patch_uint) + +#endif int patch_branch(u32 *addr, unsigned long target, int flags) {
Adds a generic memory patching mechanism for patches of size int or long bytes. The patch_instruction function is reimplemented in terms of this more generic function. This generic implementation allows patching of arbitrary long data, such as pointers on 64-bit. As a performance optimisation the patch will unconditionally flush the icache, as patch_instruction is used significantly more often and in more performance sensitive contexts (e.g., ftrace activation). On 32-bit patch_int is marked noinline to prevent a mis-optimisation. Without noinline, inside patch_branch the compiler may inline all the way to do_patch_memory, preventing the compiler from inlining do_patch_memory into patch_int. This would needlessly force patch_int to be a branch to do_patch_memory. The 'IS_ENABLED(CONFIG_PPC64) && ' part of the patch size condition helps GCC inline __patch_memory properly. Only 64-bit requires doubleword writes anyway, as ints and longs are the same size on 32-bit. Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> --- arch/powerpc/include/asm/code-patching.h | 29 ++++++++++ arch/powerpc/lib/code-patching.c | 73 ++++++++++++++++++------ 2 files changed, 85 insertions(+), 17 deletions(-)