Message ID | 20220926064316.765967-2-bgray@linux.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Out-of-line static calls for powerpc64 ELF V2 | expand |
Hi Benjamin, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on 3d7a198cfdb47405cfb4a3ea523876569fe341e6] url: https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220926-145009 base: 3d7a198cfdb47405cfb4a3ea523876569fe341e6 config: powerpc-allyesconfig compiler: powerpc-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/7e7a5738456329ebbc24558228fb729ce5236f60 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220926-145009 git checkout 7e7a5738456329ebbc24558228fb729ce5236f60 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/lib/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> arch/powerpc/lib/code-patching.c:18:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration] 18 | static int __always_inline ___patch_memory(void *patch_addr, | ^~~~~~ vim +/inline +18 arch/powerpc/lib/code-patching.c 17 > 18 static int __always_inline ___patch_memory(void *patch_addr, 19 unsigned long data, 20 void *prog_addr, 21 size_t size) 22 { 23 switch (size) { 24 case 1: 25 __put_kernel_nofault(patch_addr, &data, u8, failed); 26 break; 27 case 2: 28 __put_kernel_nofault(patch_addr, &data, u16, failed); 29 break; 30 case 4: 31 __put_kernel_nofault(patch_addr, &data, u32, failed); 32 break; 33 #ifdef CONFIG_PPC64 34 case 8: 35 __put_kernel_nofault(patch_addr, &data, u64, failed); 36 break; 37 #endif 38 default: 39 unreachable(); 40 } 41 42 dcbst(patch_addr); 43 dcbst(patch_addr + size - 1); /* Last byte of data may cross a cacheline */ 44 45 mb(); /* sync */ 46 47 /* Flush on the EA that may be executed in case of a non-coherent icache */ 48 icbi(prog_addr); 49 50 /* Also flush the last byte of the instruction if it may be a 51 * prefixed instruction and we aren't assuming minimum 64-byte 52 * cacheline sizes 53 */ 54 if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) 55 icbi(prog_addr + size - 1); 56 57 mb(); /* sync */ 58 isync(); 59 60 return 0; 61 62 failed: 63 return -EPERM; 64 } 65
Hi Benjamin, Thank you for the patch! Yet something to improve: [auto build test ERROR on 3d7a198cfdb47405cfb4a3ea523876569fe341e6] url: https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220926-145009 base: 3d7a198cfdb47405cfb4a3ea523876569fe341e6 config: powerpc-allnoconfig compiler: powerpc-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/7e7a5738456329ebbc24558228fb729ce5236f60 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Benjamin-Gray/Out-of-line-static-calls-for-powerpc64-ELF-V2/20220926-145009 git checkout 7e7a5738456329ebbc24558228fb729ce5236f60 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/lib/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> arch/powerpc/lib/code-patching.c:18:1: error: 'inline' is not at beginning of declaration [-Werror=old-style-declaration] 18 | static int __always_inline ___patch_memory(void *patch_addr, | ^~~~~~ cc1: all warnings being treated as errors vim +/inline +18 arch/powerpc/lib/code-patching.c 17 > 18 static int __always_inline ___patch_memory(void *patch_addr, 19 unsigned long data, 20 void *prog_addr, 21 size_t size) 22 { 23 switch (size) { 24 case 1: 25 __put_kernel_nofault(patch_addr, &data, u8, failed); 26 break; 27 case 2: 28 __put_kernel_nofault(patch_addr, &data, u16, failed); 29 break; 30 case 4: 31 __put_kernel_nofault(patch_addr, &data, u32, failed); 32 break; 33 #ifdef CONFIG_PPC64 34 case 8: 35 __put_kernel_nofault(patch_addr, &data, u64, failed); 36 break; 37 #endif 38 default: 39 unreachable(); 40 } 41 42 dcbst(patch_addr); 43 dcbst(patch_addr + size - 1); /* Last byte of data may cross a cacheline */ 44 45 mb(); /* sync */ 46 47 /* Flush on the EA that may be executed in case of a non-coherent icache */ 48 icbi(prog_addr); 49 50 /* Also flush the last byte of the instruction if it may be a 51 * prefixed instruction and we aren't assuming minimum 64-byte 52 * cacheline sizes 53 */ 54 if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) 55 icbi(prog_addr + size - 1); 56 57 mb(); /* sync */ 58 isync(); 59 60 return 0; 61 62 failed: 63 return -EPERM; 64 } 65
Hi, By the way my email address is not anymore @c-s.fr but @csgroup.eu allthough the former still works. Le 26/09/2022 à 08:43, Benjamin Gray a écrit : > Adds a generic text patching mechanism for patches of 1, 2, 4, or (64-bit) 8 > bytes. The patcher conditionally syncs the icache depending on if > the content will be executed (as opposed to, e.g., read-only data). > > The `patch_instruction` function is reimplemented in terms of this > more generic function. This generic implementation allows patching of > arbitrary 64-bit data, whereas the original `patch_instruction` decided > the size based on the 'instruction' opcode, so was not suitable for > arbitrary data. I get a lot better though still some slight degradation: I get approx 3% more time needed to activate and de-activate ftrace when STRICT_KERNEL_RWX is selected. I get a surprising result without STRICT_KERNEL_RWX. Activation is also 3% but the de-activation needs 25% more time. > > Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> > --- > arch/powerpc/include/asm/code-patching.h | 7 ++ > arch/powerpc/lib/code-patching.c | 90 +++++++++++++++++------- > 2 files changed, 71 insertions(+), 26 deletions(-) > > diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h > index 1c6316ec4b74..15efd8ab22da 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -76,6 +76,13 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > int patch_branch(u32 *addr, unsigned long target, int flags); > int patch_instruction(u32 *addr, ppc_inst_t instr); > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > +int __patch_memory(void *dest, unsigned long src, size_t size); > + > +#define patch_memory(addr, val) \ > +({ \ > + BUILD_BUG_ON(!__native_word(val)); \ > + __patch_memory(addr, (unsigned long) val, sizeof(val)); \ > +}) Can you do a static __always_inline function instead of a macro here ? > > static inline unsigned long patch_site_addr(s32 *site) > { > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index ad0cf3108dd0..9979380d55ef 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -15,20 +15,47 @@ > #include <asm/code-patching.h> > #include <asm/inst.h> > > -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) > +static int __always_inline ___patch_memory(void *patch_addr, > + unsigned long data, > + void *prog_addr, > + size_t size) Is it really needed in the .c file ? I would expect GCC to take the right decision by itself. By the way, the __always_inline must immediately follow static. > { > - if (!ppc_inst_prefixed(instr)) { > - u32 val = ppc_inst_val(instr); > + switch (size) { > + case 1: > + __put_kernel_nofault(patch_addr, &data, u8, failed); > + break; > + case 2: > + __put_kernel_nofault(patch_addr, &data, u16, failed); > + break; > + case 4: > + __put_kernel_nofault(patch_addr, &data, u32, failed); > + break; > +#ifdef CONFIG_PPC64 > + case 8: > + __put_kernel_nofault(patch_addr, &data, u64, failed); > + break; > +#endif > + default: > + unreachable(); A BUILD_BUG() would be better here I think. > + } > > - __put_kernel_nofault(patch_addr, &val, u32, failed); > - } else { > - u64 val = ppc_inst_as_ulong(instr); > + dcbst(patch_addr); > + dcbst(patch_addr + size - 1); /* Last byte of data may cross a cacheline */ Or the second byte of data may cross a cacheline ... > > - __put_kernel_nofault(patch_addr, &val, u64, failed); > - } > + mb(); /* sync */ > + > + /* Flush on the EA that may be executed in case of a non-coherent icache */ > + icbi(prog_addr); > + > + /* Also flush the last byte of the instruction if it may be a > + * prefixed instruction and we aren't assuming minimum 64-byte > + * cacheline sizes > + */ > + if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) > + icbi(prog_addr + size - 1); > > - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), > - "r" (exec_addr)); > + mb(); /* sync */ > + isync(); > > return 0; > > @@ -38,7 +65,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr > > int raw_patch_instruction(u32 *addr, ppc_inst_t instr) > { > - return __patch_instruction(addr, instr, addr); > + if (ppc_inst_prefixed(instr)) > + return ___patch_memory(addr, ppc_inst_as_ulong(instr), addr, sizeof(u64)); > + else > + return ___patch_memory(addr, ppc_inst_val(instr), addr, sizeof(u32)); > } > > #ifdef CONFIG_STRICT_KERNEL_RWX > @@ -147,24 +177,22 @@ static void unmap_patch_area(unsigned long addr) > flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > } > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __always_inline __do_patch_memory(void *dest, unsigned long src, size_t size) > { Whaou, do we really want all this to be __always_inline ? Did you check the text size increase ? > int err; > u32 *patch_addr; > - unsigned long text_poke_addr; > pte_t *pte; > - unsigned long pfn = get_patch_pfn(addr); > - > - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > - patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); > + unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > + unsigned long pfn = get_patch_pfn(dest); > > + patch_addr = (u32 *)(text_poke_addr + offset_in_page(dest)); > pte = virt_to_kpte(text_poke_addr); > __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); > /* See ptesync comment in radix__set_pte_at() */ > if (radix_enabled()) > asm volatile("ptesync": : :"memory"); > > - err = __patch_instruction(addr, instr, patch_addr); > + err = ___patch_memory(patch_addr, src, dest, size); > > pte_clear(&init_mm, text_poke_addr, pte); > flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); > @@ -172,7 +200,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > return err; > } > > -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __always_inline do_patch_memory(void *dest, unsigned long src, size_t size) > { > int err; > unsigned long flags; > @@ -183,32 +211,42 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > * to allow patching. We just do the plain old patching > */ > if (!static_branch_likely(&poking_init_done)) > - return raw_patch_instruction(addr, instr); > + return ___patch_memory(dest, src, dest, size); > > local_irq_save(flags); > - err = __do_patch_instruction(addr, instr); > + err = __do_patch_memory(dest, src, size); > local_irq_restore(flags); > > return err; > } > + > #else /* !CONFIG_STRICT_KERNEL_RWX */ > > -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int do_patch_memory(void *dest, unsigned long src, size_t size) > { > - return raw_patch_instruction(addr, instr); > + return ___patch_memory(dest, src, dest, size); > } > > #endif /* CONFIG_STRICT_KERNEL_RWX */ > > __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); > > -int patch_instruction(u32 *addr, ppc_inst_t instr) > +int __patch_memory(void *dest, unsigned long src, size_t size) > { > /* Make sure we aren't patching a freed init section */ > - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) > + if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4)) > return 0; > > - return do_patch_instruction(addr, instr); > + return do_patch_memory(dest, src, size); > +} > +NOKPROBE_SYMBOL(__patch_memory); > + > +int patch_instruction(u32 *addr, ppc_inst_t instr) > +{ > + if (ppc_inst_prefixed(instr)) > + return patch_memory(addr, ppc_inst_as_ulong(instr)); > + else > + return patch_memory(addr, ppc_inst_val(instr)); > } > NOKPROBE_SYMBOL(patch_instruction); > > -- > 2.37.3
On Mon, 2022-09-26 at 14:33 +0000, Christophe Leroy wrote: > > +#define patch_memory(addr, val) \ > > +({ \ > > + BUILD_BUG_ON(!__native_word(val)); \ > > + __patch_memory(addr, (unsigned long) val, sizeof(val)); \ > > +}) > > Can you do a static __always_inline function instead of a macro here > ? I didn't before because it doesn't allow using the type as a parameter. I considered these forms patch_memory(addr, val, 8); patch_memory(addr, val, void*); patch_memory(addr, val); // size taken from val type And thought the third was the nicest to use. Though coming back to this, I hadn't considered patch_memory(addr, val, sizeof(void*)) which would still allow a type to decide the size, and not be a macro. I've got an example implementation further down that also addresses the size check issue. > > +static int __always_inline ___patch_memory(void *patch_addr, > > + unsigned long data, > > + void *prog_addr, > > + size_t size) > > Is it really needed in the .c file ? I would expect GCC to take the > right decision by itself. I thought it'd be better to always inline it given it's only used generically in do_patch_memory and __do_patch_memory, which both get inlined into __patch_memory. But it does end up generating two copies due to the different contexts it's called in, so probably not worth it. Removed for v3. (raw_patch_instruction gets an optimised inline of ___patch_memory either way) > A BUILD_BUG() would be better here I think. BUILD_BUG() as the default case always triggers though, I assume because the constant used for size is too far away. How about static __always_inline int patch_memory(void *addr, unsigned long val, size_t size) { int __patch_memory(void *dest, unsigned long src, size_t size); BUILD_BUG_ON_MSG(!(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long)), "Unsupported size for patch_memory"); return __patch_memory(addr, val, size); } Declaring the __patch_memory function inside of patch_memory enforces that you can't accidentally call __patch_memory without going through this or the *patch_instruction entry points (which hardcode the size). > > + } > > > > - __put_kernel_nofault(patch_addr, &val, u32, > > failed); > > - } else { > > - u64 val = ppc_inst_as_ulong(instr); > > + dcbst(patch_addr); > > + dcbst(patch_addr + size - 1); /* Last byte of data may > > cross a cacheline */ > > Or the second byte of data may cross a cacheline ... It might, but unless we are assuming data cachelines smaller than the native word size it will either be in the first or last byte's cacheline. Whereas the last byte might be in it's own cacheline. As justification the comment's misleading though, how about reducing it to "data may cross a cacheline" and leaving the reason for flushing the last byte implicit? > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > > +static int __always_inline __do_patch_memory(void *dest, unsigned > > long src, size_t size) > > { > > Whaou, do we really want all this to be __always_inline ? Did you > check > the text size increase ? These ones are redundant because GCC will already inline them, they were just part of experimenting inlining ___patch_memory. Will remove for v3. The text size doesn't increase though because the call hierarchy is just a linear chain of __patch_memory -> do_patch_memory -> __do_patch_memory The entry point __patch_memory is not inlined.
Le 27/09/2022 à 04:57, Benjamin Gray a écrit : > On Mon, 2022-09-26 at 14:33 +0000, Christophe Leroy wrote: >>> +#define patch_memory(addr, val) \ >>> +({ \ >>> + BUILD_BUG_ON(!__native_word(val)); \ >>> + __patch_memory(addr, (unsigned long) val, sizeof(val)); \ >>> +}) >> >> Can you do a static __always_inline function instead of a macro here >> ? > > I didn't before because it doesn't allow using the type as a parameter. > I considered these forms > > patch_memory(addr, val, 8); > patch_memory(addr, val, void*); > patch_memory(addr, val); // size taken from val type > > And thought the third was the nicest to use. Though coming back to > this, I hadn't considered > > patch_memory(addr, val, sizeof(void*)) > > which would still allow a type to decide the size, and not be a macro. > I've got an example implementation further down that also addresses the > size check issue. Oh, I missed that you did automatic type sizing. Fair enough. However I think taking the type of the passed value is dangerous. See put_user(), it uses the size of the destination pointer, not the size of the input value. patch_memory doesn't seem to be used outside of code-patching.c, so I don't thing it is worth to worry about a nice looking API. Just make it simple and pass the size to the function. > >>> +static int __always_inline ___patch_memory(void *patch_addr, >>> + unsigned long data, >>> + void *prog_addr, >>> + size_t size) >> >> Is it really needed in the .c file ? I would expect GCC to take the >> right decision by itself. > > I thought it'd be better to always inline it given it's only used > generically in do_patch_memory and __do_patch_memory, which both get > inlined into __patch_memory. But it does end up generating two copies > due to the different contexts it's called in, so probably not worth it. > Removed for v3. > > (raw_patch_instruction gets an optimised inline of ___patch_memory > either way) > >> A BUILD_BUG() would be better here I think. > > BUILD_BUG() as the default case always triggers though, I assume > because the constant used for size is too far away. How about > > static __always_inline int patch_memory(void *addr, > unsigned long val, > size_t size) > { > int __patch_memory(void *dest, unsigned long src, size_t size); > > BUILD_BUG_ON_MSG(!(size == sizeof(char) || > size == sizeof(short) || > size == sizeof(int) || > size == sizeof(long)), > "Unsupported size for patch_memory"); > return __patch_memory(addr, val, size); > } > > Declaring the __patch_memory function inside of patch_memory enforces > that you can't accidentally call __patch_memory without going through > this or the *patch_instruction entry points (which hardcode the size). Aren't you making it more difficult that needed ? That's C, not C plus plus and we are not trying to help the user. All kernel developpers know that as soon as they use a function that has a leading double underscore they will be on their own. And again, patch_memory() isn't used anywhere else, at least for the time being, so why worry about that ? > >>> + } >>> >>> - __put_kernel_nofault(patch_addr, &val, u32, >>> failed); >>> - } else { >>> - u64 val = ppc_inst_as_ulong(instr); >>> + dcbst(patch_addr); >>> + dcbst(patch_addr + size - 1); /* Last byte of data may >>> cross a cacheline */ >> >> Or the second byte of data may cross a cacheline ... > > It might, but unless we are assuming data cachelines smaller than the > native word size it will either be in the first or last byte's > cacheline. Whereas the last byte might be in it's own cacheline. > > As justification the comment's misleading though, how about reducing it > to "data may cross a cacheline" and leaving the reason for flushing the > last byte implicit? Yes that was my worry, a misleading comment. I think "data may cross a cacheline" is what we need as a comment. > >>> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) >>> +static int __always_inline __do_patch_memory(void *dest, unsigned >>> long src, size_t size) >>> { >> >> Whaou, do we really want all this to be __always_inline ? Did you >> check >> the text size increase ? > > These ones are redundant because GCC will already inline them, they > were just part of experimenting inlining ___patch_memory. Will remove > for v3. > > The text size doesn't increase though because the call hierarchy is > just a linear chain of > __patch_memory -> do_patch_memory -> __do_patch_memory Yes, I had in mind that all those would be inlined doing to all callers of patch_instruction() and patch_memory(), but of course it stays in code_patching.c so that's not a problem. > > The entry point __patch_memory is not inlined.
Le 26/09/2022 à 08:43, Benjamin Gray a écrit : > Adds a generic text patching mechanism for patches of 1, 2, 4, or (64-bit) 8 > bytes. The patcher conditionally syncs the icache depending on if > the content will be executed (as opposed to, e.g., read-only data). > > The `patch_instruction` function is reimplemented in terms of this > more generic function. This generic implementation allows patching of > arbitrary 64-bit data, whereas the original `patch_instruction` decided > the size based on the 'instruction' opcode, so was not suitable for > arbitrary data. > > Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> > --- > arch/powerpc/include/asm/code-patching.h | 7 ++ > arch/powerpc/lib/code-patching.c | 90 +++++++++++++++++------- > 2 files changed, 71 insertions(+), 26 deletions(-) > > diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h > index 1c6316ec4b74..15efd8ab22da 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -76,6 +76,13 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > int patch_branch(u32 *addr, unsigned long target, int flags); > int patch_instruction(u32 *addr, ppc_inst_t instr); > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > +int __patch_memory(void *dest, unsigned long src, size_t size); > + > +#define patch_memory(addr, val) \ > +({ \ > + BUILD_BUG_ON(!__native_word(val)); \ > + __patch_memory(addr, (unsigned long) val, sizeof(val)); \ > +}) > > static inline unsigned long patch_site_addr(s32 *site) > { > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index ad0cf3108dd0..9979380d55ef 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -15,20 +15,47 @@ > #include <asm/code-patching.h> > #include <asm/inst.h> > > -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) > +static int __always_inline ___patch_memory(void *patch_addr, > + unsigned long data, > + void *prog_addr, > + size_t size) Could you reduce the number of lines ? For instance: static int __always_inline ___patch_memory(void *patch_addr, unsigned long data, void *prog_addr, size_t size) Also, 3 underscodes starts to be a lot too much, can we be a bit more creative on the function name ? > { > - if (!ppc_inst_prefixed(instr)) { > - u32 val = ppc_inst_val(instr); > + switch (size) { > + case 1: > + __put_kernel_nofault(patch_addr, &data, u8, failed); > + break; > + case 2: > + __put_kernel_nofault(patch_addr, &data, u16, failed); > + break; > + case 4: > + __put_kernel_nofault(patch_addr, &data, u32, failed); > + break; > +#ifdef CONFIG_PPC64 > + case 8: > + __put_kernel_nofault(patch_addr, &data, u64, failed); > + break; > +#endif > + default: > + unreachable(); > + } > > - __put_kernel_nofault(patch_addr, &val, u32, failed); > - } else { > - u64 val = ppc_inst_as_ulong(instr); > + dcbst(patch_addr); > + dcbst(patch_addr + size - 1); /* Last byte of data may cross a cacheline */ > > - __put_kernel_nofault(patch_addr, &val, u64, failed); > - } > + mb(); /* sync */ > + > + /* Flush on the EA that may be executed in case of a non-coherent icache */ > + icbi(prog_addr); prog_addr is a misleading name ? Is that the address at which you program it ? Is that the address the programs runs at ? exec_addr was a lot more explicit as it clearly defines the address at which the code is executed. > + > + /* Also flush the last byte of the instruction if it may be a > + * prefixed instruction and we aren't assuming minimum 64-byte > + * cacheline sizes > + */ > + if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) > + icbi(prog_addr + size - 1); This doesn't exist today. I'd rather have: BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64); > > - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), > - "r" (exec_addr)); > + mb(); /* sync */ > + isync(); > > return 0; > > @@ -38,7 +65,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr > > int raw_patch_instruction(u32 *addr, ppc_inst_t instr) > { > - return __patch_instruction(addr, instr, addr); > + if (ppc_inst_prefixed(instr)) > + return ___patch_memory(addr, ppc_inst_as_ulong(instr), addr, sizeof(u64)); > + else > + return ___patch_memory(addr, ppc_inst_val(instr), addr, sizeof(u32)); > } > > #ifdef CONFIG_STRICT_KERNEL_RWX > @@ -147,24 +177,22 @@ static void unmap_patch_area(unsigned long addr) > flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > } > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __always_inline __do_patch_memory(void *dest, unsigned long src, size_t size) > { > int err; > u32 *patch_addr; > - unsigned long text_poke_addr; > pte_t *pte; > - unsigned long pfn = get_patch_pfn(addr); > - > - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > - patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); > + unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > + unsigned long pfn = get_patch_pfn(dest); > > + patch_addr = (u32 *)(text_poke_addr + offset_in_page(dest)); Can we avoid this churn ? Ok, you want to change 'addr' to 'dest', can we leave everything else as is ? Previously, there was a clear splitting of the function: address preparation blank line MMU mapping blank line patching blank line MMU unmapping Now the function seems disorganised and is less readable. > pte = virt_to_kpte(text_poke_addr); > __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); > /* See ptesync comment in radix__set_pte_at() */ > if (radix_enabled()) > asm volatile("ptesync": : :"memory"); > > - err = __patch_instruction(addr, instr, patch_addr); > + err = ___patch_memory(patch_addr, src, dest, size); > > pte_clear(&init_mm, text_poke_addr, pte); > flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); > @@ -172,7 +200,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > return err; > } > > -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __always_inline do_patch_memory(void *dest, unsigned long src, size_t size) > { > int err; > unsigned long flags; > @@ -183,32 +211,42 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > * to allow patching. We just do the plain old patching > */ > if (!static_branch_likely(&poking_init_done)) > - return raw_patch_instruction(addr, instr); > + return ___patch_memory(dest, src, dest, size); > > local_irq_save(flags); > - err = __do_patch_instruction(addr, instr); > + err = __do_patch_memory(dest, src, size); > local_irq_restore(flags); > > return err; > } > + > #else /* !CONFIG_STRICT_KERNEL_RWX */ > > -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int do_patch_memory(void *dest, unsigned long src, size_t size) > { > - return raw_patch_instruction(addr, instr); > + return ___patch_memory(dest, src, dest, size); > } > > #endif /* CONFIG_STRICT_KERNEL_RWX */ > > __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); > > -int patch_instruction(u32 *addr, ppc_inst_t instr) > +int __patch_memory(void *dest, unsigned long src, size_t size) > { > /* Make sure we aren't patching a freed init section */ > - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) > + if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4)) > return 0; > > - return do_patch_instruction(addr, instr); > + return do_patch_memory(dest, src, size); > +} > +NOKPROBE_SYMBOL(__patch_memory); > + > +int patch_instruction(u32 *addr, ppc_inst_t instr) > +{ > + if (ppc_inst_prefixed(instr)) > + return patch_memory(addr, ppc_inst_as_ulong(instr)); > + else > + return patch_memory(addr, ppc_inst_val(instr)); > } > NOKPROBE_SYMBOL(patch_instruction); > > -- > 2.37.3
Le 26/09/2022 à 08:43, Benjamin Gray a écrit : > Adds a generic text patching mechanism for patches of 1, 2, 4, or (64-bit) 8 > bytes. The patcher conditionally syncs the icache depending on if > the content will be executed (as opposed to, e.g., read-only data). > > The `patch_instruction` function is reimplemented in terms of this > more generic function. This generic implementation allows patching of > arbitrary 64-bit data, whereas the original `patch_instruction` decided > the size based on the 'instruction' opcode, so was not suitable for > arbitrary data. > > Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> > --- > arch/powerpc/include/asm/code-patching.h | 7 ++ > arch/powerpc/lib/code-patching.c | 90 +++++++++++++++++------- > 2 files changed, 71 insertions(+), 26 deletions(-) > > diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h > index 1c6316ec4b74..15efd8ab22da 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -76,6 +76,13 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > int patch_branch(u32 *addr, unsigned long target, int flags); > int patch_instruction(u32 *addr, ppc_inst_t instr); > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > +int __patch_memory(void *dest, unsigned long src, size_t size); > + > +#define patch_memory(addr, val) \ > +({ \ > + BUILD_BUG_ON(!__native_word(val)); \ > + __patch_memory(addr, (unsigned long) val, sizeof(val)); \ > +}) > > static inline unsigned long patch_site_addr(s32 *site) > { > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index ad0cf3108dd0..9979380d55ef 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -15,20 +15,47 @@ > #include <asm/code-patching.h> > #include <asm/inst.h> > > -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) > +static int __always_inline ___patch_memory(void *patch_addr, > + unsigned long data, > + void *prog_addr, > + size_t size) > { > - if (!ppc_inst_prefixed(instr)) { > - u32 val = ppc_inst_val(instr); > + switch (size) { > + case 1: > + __put_kernel_nofault(patch_addr, &data, u8, failed); > + break; > + case 2: > + __put_kernel_nofault(patch_addr, &data, u16, failed); > + break; > + case 4: > + __put_kernel_nofault(patch_addr, &data, u32, failed); > + break; > +#ifdef CONFIG_PPC64 > + case 8: > + __put_kernel_nofault(patch_addr, &data, u64, failed); > + break; > +#endif > + default: > + unreachable(); > + } > > - __put_kernel_nofault(patch_addr, &val, u32, failed); > - } else { > - u64 val = ppc_inst_as_ulong(instr); > + dcbst(patch_addr); > + dcbst(patch_addr + size - 1); /* Last byte of data may cross a cacheline */ Would it be possible to minimise the above ? What is patch_memory used for ? It is not meant to patch raw data, but things like code text or code data. The only realistic case I see where we could have a cache split is 8 bytes data. And even that, we should be able to take care of it. So, do we need that double dcbst at all ? Or at least do we need it for the 1,2,4 cases ? I was wrongly expecting it to be a single additional instruction, but in fact it is a few more: In raw_patch_instruction() it remains reasonable: 4: 7c 00 18 6c dcbst 0,r3 8: 39 23 00 03 addi r9,r3,3 c: 7c 00 48 6c dcbst 0,r9 But in patch_memory it is one more: 80: 7c 00 18 6c dcbst 0,r3 84: 38 a5 ff ff addi r5,r5,-1 88: 7c a3 2a 14 add r5,r3,r5 8c: 7c 00 28 6c dcbst 0,r5 > > - __put_kernel_nofault(patch_addr, &val, u64, failed); > - } > + mb(); /* sync */ > + > + /* Flush on the EA that may be executed in case of a non-coherent icache */ > + icbi(prog_addr); > + > + /* Also flush the last byte of the instruction if it may be a > + * prefixed instruction and we aren't assuming minimum 64-byte > + * cacheline sizes > + */ > + if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) > + icbi(prog_addr + size - 1); > > - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), > - "r" (exec_addr)); > + mb(); /* sync */ > + isync(); > > return 0; > > @@ -38,7 +65,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr > > int raw_patch_instruction(u32 *addr, ppc_inst_t instr) > { > - return __patch_instruction(addr, instr, addr); > + if (ppc_inst_prefixed(instr)) > + return ___patch_memory(addr, ppc_inst_as_ulong(instr), addr, sizeof(u64)); > + else > + return ___patch_memory(addr, ppc_inst_val(instr), addr, sizeof(u32)); > } > > #ifdef CONFIG_STRICT_KERNEL_RWX > @@ -147,24 +177,22 @@ static void unmap_patch_area(unsigned long addr) > flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > } > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __always_inline __do_patch_memory(void *dest, unsigned long src, size_t size) > { > int err; > u32 *patch_addr; > - unsigned long text_poke_addr; > pte_t *pte; > - unsigned long pfn = get_patch_pfn(addr); > - > - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > - patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); > + unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > + unsigned long pfn = get_patch_pfn(dest); > > + patch_addr = (u32 *)(text_poke_addr + offset_in_page(dest)); > pte = virt_to_kpte(text_poke_addr); > __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); > /* See ptesync comment in radix__set_pte_at() */ > if (radix_enabled()) > asm volatile("ptesync": : :"memory"); > > - err = __patch_instruction(addr, instr, patch_addr); > + err = ___patch_memory(patch_addr, src, dest, size); > > pte_clear(&init_mm, text_poke_addr, pte); > flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); > @@ -172,7 +200,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > return err; > } > > -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __always_inline do_patch_memory(void *dest, unsigned long src, size_t size) > { > int err; > unsigned long flags; > @@ -183,32 +211,42 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > * to allow patching. We just do the plain old patching > */ > if (!static_branch_likely(&poking_init_done)) > - return raw_patch_instruction(addr, instr); > + return ___patch_memory(dest, src, dest, size); > > local_irq_save(flags); > - err = __do_patch_instruction(addr, instr); > + err = __do_patch_memory(dest, src, size); > local_irq_restore(flags); > > return err; > } > + > #else /* !CONFIG_STRICT_KERNEL_RWX */ > > -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int do_patch_memory(void *dest, unsigned long src, size_t size) > { > - return raw_patch_instruction(addr, instr); > + return ___patch_memory(dest, src, dest, size); > } > > #endif /* CONFIG_STRICT_KERNEL_RWX */ > > __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); > > -int patch_instruction(u32 *addr, ppc_inst_t instr) > +int __patch_memory(void *dest, unsigned long src, size_t size) > { > /* Make sure we aren't patching a freed init section */ > - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) > + if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4)) > return 0; > > - return do_patch_instruction(addr, instr); > + return do_patch_memory(dest, src, size); > +} > +NOKPROBE_SYMBOL(__patch_memory); > + > +int patch_instruction(u32 *addr, ppc_inst_t instr) > +{ > + if (ppc_inst_prefixed(instr)) > + return patch_memory(addr, ppc_inst_as_ulong(instr)); > + else > + return patch_memory(addr, ppc_inst_val(instr)); > } > NOKPROBE_SYMBOL(patch_instruction); > > -- > 2.37.3
On Tue, 2022-09-27 at 06:40 +0000, Christophe Leroy wrote: > > + /* Flush on the EA that may be executed in case of a non- > > coherent icache */ > > + icbi(prog_addr); > > prog_addr is a misleading name ? Is that the address at which you > program it ? Is that the address the programs runs at ? > > exec_addr was a lot more explicit as it clearly defines the address > at > which the code is executed. I'm not sure what it could be confused for other than "the address the program uses" (be it uses for executing, or uses as data). I just called it that because it's not necessarily executed, so 'exec_addr' is misleading (to the extent it matters in the first place...). > > + if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) > > + icbi(prog_addr + size - 1); > > This doesn't exist today. > > I'd rather have: > > BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < > 64); Sure, I can adjust the style. > > +static int __always_inline __do_patch_memory(void *dest, unsigned > > long src, size_t size) > > { > > int err; > > u32 *patch_addr; > > - unsigned long text_poke_addr; > > pte_t *pte; > > - unsigned long pfn = get_patch_pfn(addr); > > - > > - text_poke_addr = (unsigned > > long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > > - patch_addr = (u32 *)(text_poke_addr + > > offset_in_page(addr)); > > + unsigned long text_poke_addr = (unsigned > > long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; > > + unsigned long pfn = get_patch_pfn(dest); > > > > + patch_addr = (u32 *)(text_poke_addr + > > offset_in_page(dest)); > > Can we avoid this churn ? > Ok, you want to change 'addr' to 'dest', can we leave everything else > as > is ? 'addr' was only renamed because the v1 used a pointer to the data, so 'addr' was ambiguous. I'll restore it to 'addr' for v3. I'll also restore the formatting.
Le 28/09/2022 à 03:30, Benjamin Gray a écrit : > On Tue, 2022-09-27 at 06:40 +0000, Christophe Leroy wrote: >>> + /* Flush on the EA that may be executed in case of a non- >>> coherent icache */ >>> + icbi(prog_addr); >> >> prog_addr is a misleading name ? Is that the address at which you >> program it ? Is that the address the programs runs at ? >> >> exec_addr was a lot more explicit as it clearly defines the address >> at >> which the code is executed. > > I'm not sure what it could be confused for other than "the address the > program uses" (be it uses for executing, or uses as data). I just > called it that because it's not necessarily executed, so 'exec_addr' is > misleading (to the extent it matters in the first place...). For me, at first look it means 'programming address', that is the address that is used to perform the programming ie the patching. Hence the confusion. Whereas exec_addr is explicitely for me the address at which the code is run. > >>> + if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) >>> + icbi(prog_addr + size - 1); >> >> This doesn't exist today. >> >> I'd rather have: >> >> BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < >> 64); > > Sure, I can adjust the style. That's not just style ... > >>> +static int __always_inline __do_patch_memory(void *dest, unsigned >>> long src, size_t size) >>> { >>> int err; >>> u32 *patch_addr; >>> - unsigned long text_poke_addr; >>> pte_t *pte; >>> - unsigned long pfn = get_patch_pfn(addr); >>> - >>> - text_poke_addr = (unsigned >>> long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; >>> - patch_addr = (u32 *)(text_poke_addr + >>> offset_in_page(addr)); >>> + unsigned long text_poke_addr = (unsigned >>> long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; >>> + unsigned long pfn = get_patch_pfn(dest); >>> >>> + patch_addr = (u32 *)(text_poke_addr + >>> offset_in_page(dest)); >> >> Can we avoid this churn ? >> Ok, you want to change 'addr' to 'dest', can we leave everything else >> as >> is ? > > 'addr' was only renamed because the v1 used a pointer to the data, so > 'addr' was ambiguous. I'll restore it to 'addr' for v3. > > I'll also restore the formatting. Then maybe the 'src' should be renamed 'val' or something else. Christophe
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 1c6316ec4b74..15efd8ab22da 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -76,6 +76,13 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, int patch_branch(u32 *addr, unsigned long target, int flags); int patch_instruction(u32 *addr, ppc_inst_t instr); int raw_patch_instruction(u32 *addr, ppc_inst_t instr); +int __patch_memory(void *dest, unsigned long src, size_t size); + +#define patch_memory(addr, val) \ +({ \ + BUILD_BUG_ON(!__native_word(val)); \ + __patch_memory(addr, (unsigned long) val, sizeof(val)); \ +}) static inline unsigned long patch_site_addr(s32 *site) { diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index ad0cf3108dd0..9979380d55ef 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -15,20 +15,47 @@ #include <asm/code-patching.h> #include <asm/inst.h> -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) +static int __always_inline ___patch_memory(void *patch_addr, + unsigned long data, + void *prog_addr, + size_t size) { - if (!ppc_inst_prefixed(instr)) { - u32 val = ppc_inst_val(instr); + switch (size) { + case 1: + __put_kernel_nofault(patch_addr, &data, u8, failed); + break; + case 2: + __put_kernel_nofault(patch_addr, &data, u16, failed); + break; + case 4: + __put_kernel_nofault(patch_addr, &data, u32, failed); + break; +#ifdef CONFIG_PPC64 + case 8: + __put_kernel_nofault(patch_addr, &data, u64, failed); + break; +#endif + default: + unreachable(); + } - __put_kernel_nofault(patch_addr, &val, u32, failed); - } else { - u64 val = ppc_inst_as_ulong(instr); + dcbst(patch_addr); + dcbst(patch_addr + size - 1); /* Last byte of data may cross a cacheline */ - __put_kernel_nofault(patch_addr, &val, u64, failed); - } + mb(); /* sync */ + + /* Flush on the EA that may be executed in case of a non-coherent icache */ + icbi(prog_addr); + + /* Also flush the last byte of the instruction if it may be a + * prefixed instruction and we aren't assuming minimum 64-byte + * cacheline sizes + */ + if (IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64) + icbi(prog_addr + size - 1); - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), - "r" (exec_addr)); + mb(); /* sync */ + isync(); return 0; @@ -38,7 +65,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { - return __patch_instruction(addr, instr, addr); + if (ppc_inst_prefixed(instr)) + return ___patch_memory(addr, ppc_inst_as_ulong(instr), addr, sizeof(u64)); + else + return ___patch_memory(addr, ppc_inst_val(instr), addr, sizeof(u32)); } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -147,24 +177,22 @@ static void unmap_patch_area(unsigned long addr) flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int __always_inline __do_patch_memory(void *dest, unsigned long src, size_t size) { int err; u32 *patch_addr; - unsigned long text_poke_addr; pte_t *pte; - unsigned long pfn = get_patch_pfn(addr); - - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; - patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + unsigned long text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; + unsigned long pfn = get_patch_pfn(dest); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(dest)); pte = virt_to_kpte(text_poke_addr); __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); - err = __patch_instruction(addr, instr, patch_addr); + err = ___patch_memory(patch_addr, src, dest, size); pte_clear(&init_mm, text_poke_addr, pte); flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); @@ -172,7 +200,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int __always_inline do_patch_memory(void *dest, unsigned long src, size_t size) { int err; unsigned long flags; @@ -183,32 +211,42 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) * to allow patching. We just do the plain old patching */ if (!static_branch_likely(&poking_init_done)) - return raw_patch_instruction(addr, instr); + return ___patch_memory(dest, src, dest, size); local_irq_save(flags); - err = __do_patch_instruction(addr, instr); + err = __do_patch_memory(dest, src, size); local_irq_restore(flags); return err; } + #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +static int do_patch_memory(void *dest, unsigned long src, size_t size) { - return raw_patch_instruction(addr, instr); + return ___patch_memory(dest, src, dest, size); } #endif /* CONFIG_STRICT_KERNEL_RWX */ __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); -int patch_instruction(u32 *addr, ppc_inst_t instr) +int __patch_memory(void *dest, unsigned long src, size_t size) { /* Make sure we aren't patching a freed init section */ - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) + if (static_branch_likely(&init_mem_is_free) && init_section_contains(dest, 4)) return 0; - return do_patch_instruction(addr, instr); + return do_patch_memory(dest, src, size); +} +NOKPROBE_SYMBOL(__patch_memory); + +int patch_instruction(u32 *addr, ppc_inst_t instr) +{ + if (ppc_inst_prefixed(instr)) + return patch_memory(addr, ppc_inst_as_ulong(instr)); + else + return patch_memory(addr, ppc_inst_val(instr)); } NOKPROBE_SYMBOL(patch_instruction);
Adds a generic text patching mechanism for patches of 1, 2, 4, or (64-bit) 8 bytes. The patcher conditionally syncs the icache depending on if the content will be executed (as opposed to, e.g., read-only data). The `patch_instruction` function is reimplemented in terms of this more generic function. This generic implementation allows patching of arbitrary 64-bit data, whereas the original `patch_instruction` decided the size based on the 'instruction' opcode, so was not suitable for arbitrary data. Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> --- arch/powerpc/include/asm/code-patching.h | 7 ++ arch/powerpc/lib/code-patching.c | 90 +++++++++++++++++------- 2 files changed, 71 insertions(+), 26 deletions(-) -- 2.37.3