Message ID | 20231016050147.115686-1-bgray@linux.ibm.com (mailing list archive) |
---|---|
Headers | show |
Series | Add generic data patching functions | expand |
Le 16/10/2023 à 07:01, Benjamin Gray a écrit : > Currently patch_instruction() bases the write length on the value being > written. If the value looks like a prefixed instruction it writes 8 bytes, > otherwise it writes 4 bytes. This makes it potentially buggy to use for > writing arbitrary data, as if you want to write 4 bytes but it decides to > write 8 bytes it may clobber the following memory or be unaligned and > trigger an oops if it tries to cross a page boundary. > > To solve this, this series pulls out the size parameter to the 'top' of > the text patching logic, and propagates it through the various functions. > > The two sizes supported are int and long; this allows for patching > instructions and pointers on both ppc32 and ppc64. On ppc32 these are the > same size, so care is taken to only use the size parameter on static > functions, so the compiler can optimise it out entirely. Unfortunately > GCC trips over its own feet here and won't optimise in a way that is > optimal for strict RWX (mpc85xx_smp_defconfig) and no RWX > (pmac32_defconfig). > > In the first case, patch_memory() is very large and can only be inlined > if a single function calls it. While the source only calls it in > patch_instruction(), an earlier optimisation pass inlined > patch_instruction() into patch_branch(), so now there are 'two' references > to patch_memory() and it cannot be inlined into patch_instruction(). > Instead patch_instruction() becomes a single branch directly to > patch_memory(). > > We can fix this by marking patch_instruction() as noinline, but this > prevents patch_memory() from being directly inlined into patch_branch() > when RWX is disabled and patch_memory() is very small. > > It may be possible to avoid this by merging together patch_instruction() > and patch_memory() on ppc32, but the only way I can think to do this > without duplicating the implementation involves using the preprocessor > to change if is_dword is a parameter or a local variable, which is gross. What about: diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 7c6056bb1706..af89ef450c93 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -72,7 +72,7 @@ static inline int create_branch(ppc_inst_t *instr, const u32 *addr, int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); int patch_branch(u32 *addr, unsigned long target, int flags); -int patch_instruction(u32 *addr, ppc_inst_t instr); +int patch_memory(void *addr, unsigned long val, bool is_dword); int raw_patch_instruction(u32 *addr, ppc_inst_t instr); /* @@ -87,24 +87,28 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr); #ifdef CONFIG_PPC64 -int patch_uint(void *addr, unsigned int val); -int patch_ulong(void *addr, unsigned long val); +int patch_instruction(u32 *addr, ppc_inst_t instr); #define patch_u64 patch_ulong #else -static inline int patch_uint(u32 *addr, unsigned int val) +static inline int patch_instruction(u32 *addr, ppc_inst_t instr) { - return patch_instruction(addr, ppc_inst(val)); + return patch_memory(addr, ppc_inst_val(instr), false); } +#endif + static inline int patch_ulong(void *addr, unsigned long val) { - return patch_instruction(addr, ppc_inst(val)); + return patch_memory(addr, val, true); } -#endif +static inline int patch_uint(void *addr, unsigned int val) +{ + return patch_memory(addr, val, false); +} #define patch_u32 patch_uint diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 60289332412f..77418b2a4aa4 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -355,7 +355,7 @@ static int __do_patch_memory(void *addr, unsigned long val, bool is_dword) return err; } -static int patch_memory(void *addr, unsigned long val, bool is_dword) +int patch_memory(void *addr, unsigned long val, bool is_dword) { int err; unsigned long flags; @@ -378,6 +378,7 @@ static int patch_memory(void *addr, unsigned long val, bool is_dword) return err; } +NOKPROBE_SYMBOL(patch_memory) #ifdef CONFIG_PPC64 @@ -390,26 +391,6 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) } NOKPROBE_SYMBOL(patch_instruction) -int patch_uint(void *addr, unsigned int val) -{ - return patch_memory(addr, val, false); -} -NOKPROBE_SYMBOL(patch_uint) - -int patch_ulong(void *addr, unsigned long val) -{ - return patch_memory(addr, val, true); -} -NOKPROBE_SYMBOL(patch_ulong) - -#else - -int patch_instruction(u32 *addr, ppc_inst_t instr) -{ - return patch_memory(addr, ppc_inst_val(instr), false); -} -NOKPROBE_SYMBOL(patch_instruction) - #endif int patch_branch(u32 *addr, unsigned long target, int flags) > > For now I've removed the noinline, because at least the compiler might > get smarter in future and do the inlines correctly. If noinline remains > then there is no chance of it working. > > Changes from v1: > * Addressed the v1 review actions > * Removed noinline (for now) > > v1: https://patchwork.ozlabs.org/project/linuxppc-dev/cover/20230207015643.590684-1-bgray@linux.ibm.com/ > > Benjamin Gray (3): > powerpc/code-patching: Add generic memory patching > powerpc/64: Convert patch_instruction() to patch_u32() > powerpc/32: Convert patch_instruction() to patch_uint() > > arch/powerpc/include/asm/code-patching.h | 33 ++++++++++++ > arch/powerpc/kernel/module_64.c | 5 +- > arch/powerpc/kernel/static_call.c | 2 +- > arch/powerpc/lib/code-patching.c | 66 ++++++++++++++++++------ > arch/powerpc/platforms/powermac/smp.c | 2 +- > 5 files changed, 87 insertions(+), 21 deletions(-) >
On 17/10/23 5:39 pm, Christophe Leroy wrote: > Le 16/10/2023 à 07:01, Benjamin Gray a écrit : >> Currently patch_instruction() bases the write length on the value being >> written. If the value looks like a prefixed instruction it writes 8 bytes, >> otherwise it writes 4 bytes. This makes it potentially buggy to use for >> writing arbitrary data, as if you want to write 4 bytes but it decides to >> write 8 bytes it may clobber the following memory or be unaligned and >> trigger an oops if it tries to cross a page boundary. >> >> To solve this, this series pulls out the size parameter to the 'top' of >> the text patching logic, and propagates it through the various functions. >> >> The two sizes supported are int and long; this allows for patching >> instructions and pointers on both ppc32 and ppc64. On ppc32 these are the >> same size, so care is taken to only use the size parameter on static >> functions, so the compiler can optimise it out entirely. Unfortunately >> GCC trips over its own feet here and won't optimise in a way that is >> optimal for strict RWX (mpc85xx_smp_defconfig) and no RWX >> (pmac32_defconfig). >> >> In the first case, patch_memory() is very large and can only be inlined >> if a single function calls it. While the source only calls it in >> patch_instruction(), an earlier optimisation pass inlined >> patch_instruction() into patch_branch(), so now there are 'two' references >> to patch_memory() and it cannot be inlined into patch_instruction(). >> Instead patch_instruction() becomes a single branch directly to >> patch_memory(). >> >> We can fix this by marking patch_instruction() as noinline, but this >> prevents patch_memory() from being directly inlined into patch_branch() >> when RWX is disabled and patch_memory() is very small. >> >> It may be possible to avoid this by merging together patch_instruction() >> and patch_memory() on ppc32, but the only way I can think to do this >> without duplicating the implementation involves using the preprocessor >> to change if is_dword is a parameter or a local variable, which is gross. > > What about: > > diff --git a/arch/powerpc/include/asm/code-patching.h > b/arch/powerpc/include/asm/code-patching.h > index 7c6056bb1706..af89ef450c93 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -72,7 +72,7 @@ static inline int create_branch(ppc_inst_t *instr, > const u32 *addr, > int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > unsigned long target, int flags); > int patch_branch(u32 *addr, unsigned long target, int flags); > -int patch_instruction(u32 *addr, ppc_inst_t instr); > +int patch_memory(void *addr, unsigned long val, bool is_dword); > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > > /* > @@ -87,24 +87,28 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > > #ifdef CONFIG_PPC64 > > -int patch_uint(void *addr, unsigned int val); > -int patch_ulong(void *addr, unsigned long val); > +int patch_instruction(u32 *addr, ppc_inst_t instr); > > #define patch_u64 patch_ulong > > #else > > -static inline int patch_uint(u32 *addr, unsigned int val) > +static inline int patch_instruction(u32 *addr, ppc_inst_t instr) > { > - return patch_instruction(addr, ppc_inst(val)); > + return patch_memory(addr, ppc_inst_val(instr), false); > } > > +#endif > + > static inline int patch_ulong(void *addr, unsigned long val) > { > - return patch_instruction(addr, ppc_inst(val)); > + return patch_memory(addr, val, true); > } > > -#endif > +static inline int patch_uint(void *addr, unsigned int val) > +{ > + return patch_memory(addr, val, false); > +} > > #define patch_u32 patch_uint > > diff --git a/arch/powerpc/lib/code-patching.c > b/arch/powerpc/lib/code-patching.c > index 60289332412f..77418b2a4aa4 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -355,7 +355,7 @@ static int __do_patch_memory(void *addr, unsigned > long val, bool is_dword) > return err; > } > > -static int patch_memory(void *addr, unsigned long val, bool is_dword) > +int patch_memory(void *addr, unsigned long val, bool is_dword) > { > int err; > unsigned long flags; > @@ -378,6 +378,7 @@ static int patch_memory(void *addr, unsigned long > val, bool is_dword) > > return err; > } > +NOKPROBE_SYMBOL(patch_memory) > > #ifdef CONFIG_PPC64 > > @@ -390,26 +391,6 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) > } > NOKPROBE_SYMBOL(patch_instruction) > > -int patch_uint(void *addr, unsigned int val) > -{ > - return patch_memory(addr, val, false); > -} > -NOKPROBE_SYMBOL(patch_uint) > - > -int patch_ulong(void *addr, unsigned long val) > -{ > - return patch_memory(addr, val, true); > -} > -NOKPROBE_SYMBOL(patch_ulong) > - > -#else > - > -int patch_instruction(u32 *addr, ppc_inst_t instr) > -{ > - return patch_memory(addr, ppc_inst_val(instr), false); > -} > -NOKPROBE_SYMBOL(patch_instruction) > - > #endif > > int patch_branch(u32 *addr, unsigned long target, int flags) > Wouldn't every caller need to initialise the is_dword parameter in that case? It can't tell it's unused across a translation unit boundary without LTO.
Le 17/10/2023 à 08:56, Benjamin Gray a écrit : > On 17/10/23 5:39 pm, Christophe Leroy wrote: >> Le 16/10/2023 à 07:01, Benjamin Gray a écrit : >>> Currently patch_instruction() bases the write length on the value being >>> written. If the value looks like a prefixed instruction it writes 8 >>> bytes, >>> otherwise it writes 4 bytes. This makes it potentially buggy to use for >>> writing arbitrary data, as if you want to write 4 bytes but it >>> decides to >>> write 8 bytes it may clobber the following memory or be unaligned and >>> trigger an oops if it tries to cross a page boundary. >>> >>> To solve this, this series pulls out the size parameter to the 'top' of >>> the text patching logic, and propagates it through the various >>> functions. >>> >>> The two sizes supported are int and long; this allows for patching >>> instructions and pointers on both ppc32 and ppc64. On ppc32 these are >>> the >>> same size, so care is taken to only use the size parameter on static >>> functions, so the compiler can optimise it out entirely. Unfortunately >>> GCC trips over its own feet here and won't optimise in a way that is >>> optimal for strict RWX (mpc85xx_smp_defconfig) and no RWX >>> (pmac32_defconfig). >>> >>> In the first case, patch_memory() is very large and can only be inlined >>> if a single function calls it. While the source only calls it in >>> patch_instruction(), an earlier optimisation pass inlined >>> patch_instruction() into patch_branch(), so now there are 'two' >>> references >>> to patch_memory() and it cannot be inlined into patch_instruction(). >>> Instead patch_instruction() becomes a single branch directly to >>> patch_memory(). >>> >>> We can fix this by marking patch_instruction() as noinline, but this >>> prevents patch_memory() from being directly inlined into patch_branch() >>> when RWX is disabled and patch_memory() is very small. >>> >>> It may be possible to avoid this by merging together patch_instruction() >>> and patch_memory() on ppc32, but the only way I can think to do this >>> without duplicating the implementation involves using the preprocessor >>> to change if is_dword is a parameter or a local variable, which is >>> gross. >> >> What about: >> >> diff --git a/arch/powerpc/include/asm/code-patching.h >> b/arch/powerpc/include/asm/code-patching.h >> index 7c6056bb1706..af89ef450c93 100644 >> --- a/arch/powerpc/include/asm/code-patching.h >> +++ b/arch/powerpc/include/asm/code-patching.h >> @@ -72,7 +72,7 @@ static inline int create_branch(ppc_inst_t *instr, >> const u32 *addr, >> int create_cond_branch(ppc_inst_t *instr, const u32 *addr, >> unsigned long target, int flags); >> int patch_branch(u32 *addr, unsigned long target, int flags); >> -int patch_instruction(u32 *addr, ppc_inst_t instr); >> +int patch_memory(void *addr, unsigned long val, bool is_dword); >> int raw_patch_instruction(u32 *addr, ppc_inst_t instr); >> >> /* >> @@ -87,24 +87,28 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t >> instr); >> >> #ifdef CONFIG_PPC64 >> >> -int patch_uint(void *addr, unsigned int val); >> -int patch_ulong(void *addr, unsigned long val); >> +int patch_instruction(u32 *addr, ppc_inst_t instr); >> >> #define patch_u64 patch_ulong >> >> #else >> >> -static inline int patch_uint(u32 *addr, unsigned int val) >> +static inline int patch_instruction(u32 *addr, ppc_inst_t instr) >> { >> - return patch_instruction(addr, ppc_inst(val)); >> + return patch_memory(addr, ppc_inst_val(instr), false); >> } >> >> +#endif >> + >> static inline int patch_ulong(void *addr, unsigned long val) >> { >> - return patch_instruction(addr, ppc_inst(val)); >> + return patch_memory(addr, val, true); >> } >> >> -#endif >> +static inline int patch_uint(void *addr, unsigned int val) >> +{ >> + return patch_memory(addr, val, false); >> +} >> >> #define patch_u32 patch_uint >> >> diff --git a/arch/powerpc/lib/code-patching.c >> b/arch/powerpc/lib/code-patching.c >> index 60289332412f..77418b2a4aa4 100644 >> --- a/arch/powerpc/lib/code-patching.c >> +++ b/arch/powerpc/lib/code-patching.c >> @@ -355,7 +355,7 @@ static int __do_patch_memory(void *addr, unsigned >> long val, bool is_dword) >> return err; >> } >> >> -static int patch_memory(void *addr, unsigned long val, bool is_dword) >> +int patch_memory(void *addr, unsigned long val, bool is_dword) >> { >> int err; >> unsigned long flags; >> @@ -378,6 +378,7 @@ static int patch_memory(void *addr, unsigned long >> val, bool is_dword) >> >> return err; >> } >> +NOKPROBE_SYMBOL(patch_memory) >> >> #ifdef CONFIG_PPC64 >> >> @@ -390,26 +391,6 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) >> } >> NOKPROBE_SYMBOL(patch_instruction) >> >> -int patch_uint(void *addr, unsigned int val) >> -{ >> - return patch_memory(addr, val, false); >> -} >> -NOKPROBE_SYMBOL(patch_uint) >> - >> -int patch_ulong(void *addr, unsigned long val) >> -{ >> - return patch_memory(addr, val, true); >> -} >> -NOKPROBE_SYMBOL(patch_ulong) >> - >> -#else >> - >> -int patch_instruction(u32 *addr, ppc_inst_t instr) >> -{ >> - return patch_memory(addr, ppc_inst_val(instr), false); >> -} >> -NOKPROBE_SYMBOL(patch_instruction) >> - >> #endif >> >> int patch_branch(u32 *addr, unsigned long target, int flags) >> > > Wouldn't every caller need to initialise the is_dword parameter in that > case? It can't tell it's unused across a translation unit boundary > without LTO. > Ah yes you are right. Christophe