Message ID | 37dd7289fc63ef7decdec43ee74fb242d1ac6571.1625816918.git.christophe.leroy@csgroup.eu (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [1/2] powerpc/32s: Do kuep_lock() and kuep_unlock() in assembly | expand |
Related | show |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (8b3ae5772c2771c21ffcda36203f8f507ebc6fc0) |
snowpatch_ozlabs/checkpatch | warning | total: 2 errors, 1 warnings, 0 checks, 239 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Hi Christophe, I love your patch! Yet something to improve: [auto build test ERROR on powerpc/next] [also build test ERROR on next-20210709] [cannot apply to v5.13] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-32s-Do-kuep_lock-and-kuep_unlock-in-assembly/20210709-155049 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-randconfig-r032-20210709 (attached as .config) compiler: powerpc-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/8f55557e42a793db98e4e60b7c65d47a6c5a4b62 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christophe-Leroy/powerpc-32s-Do-kuep_lock-and-kuep_unlock-in-assembly/20210709-155049 git checkout 8f55557e42a793db98e4e60b7c65d47a6c5a4b62 # save the attached .config to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): arch/powerpc/kernel/entry_32.S: Assembler messages: >> arch/powerpc/kernel/entry_32.S:110: Error: unrecognized opcode: `update_user_segments_by_4' >> arch/powerpc/kernel/entry_32.S:132: Error: unrecognized opcode: `update_user_segments_by_6' arch/powerpc/kernel/entry_32.S:300: Error: unrecognized opcode: `update_user_segments_by_6' vim +110 arch/powerpc/kernel/entry_32.S 85 86 .globl transfer_to_syscall 87 transfer_to_syscall: 88 stw r11, GPR1(r1) 89 stw r11, 0(r1) 90 mflr r12 91 stw r12, _LINK(r1) 92 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 93 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ 94 #endif 95 lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ 96 SAVE_GPR(2, r1) 97 addi r12,r12,STACK_FRAME_REGS_MARKER@l 98 stw r9,_MSR(r1) 99 li r2, INTERRUPT_SYSCALL 100 stw r12,8(r1) 101 stw r2,_TRAP(r1) 102 SAVE_GPR(0, r1) 103 SAVE_4GPRS(3, r1) 104 SAVE_2GPRS(7, r1) 105 addi r2,r10,-THREAD 106 #ifdef CONFIG_PPC_KUEP 107 mfsr r9,0 108 rlwinm r9,r9,0,8,3 109 oris r9,r9,SR_NX@h > 110 update_user_segments_by_4 r9, r10, r11, r12 111 #endif 112 SAVE_NVGPRS(r1) 113 114 /* Calling convention has r9 = orig r0, r10 = regs */ 115 addi r10,r1,STACK_FRAME_OVERHEAD 116 mr r9,r0 117 bl system_call_exception 118 119 ret_from_syscall: 120 addi r4,r1,STACK_FRAME_OVERHEAD 121 li r5,0 122 bl syscall_exit_prepare 123 #ifdef CONFIG_PPC_47x 124 lis r4,icache_44x_need_flush@ha 125 lwz r5,icache_44x_need_flush@l(r4) 126 cmplwi cr0,r5,0 127 bne- 2f 128 #endif /* CONFIG_PPC_47x */ 129 #ifdef CONFIG_PPC_KUEP 130 mfsr r7,0 131 rlwinm r7,r7,0,8,2 > 132 update_user_segments_by_6 r7, r8, r9, r10, r11, r12 133 #endif 134 lwz r4,_LINK(r1) 135 lwz r5,_CCR(r1) 136 mtlr r4 137 lwz r7,_NIP(r1) 138 lwz r8,_MSR(r1) 139 cmpwi r3,0 140 lwz r3,GPR3(r1) 141 syscall_exit_finish: 142 mtspr SPRN_SRR0,r7 143 mtspr SPRN_SRR1,r8 144 145 bne 3f 146 mtcr r5 147 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 64201125a287..2e0e87cf7d7a 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -22,22 +22,6 @@ static __always_inline bool kuep_is_disabled(void) return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key); } -static inline void kuep_lock(void) -{ - if (kuep_is_disabled()) - return; - - update_user_segments(mfsr(0) | SR_NX); -} - -static inline void kuep_unlock(void) -{ - if (kuep_is_disabled()) - return; - - update_user_segments(mfsr(0) & ~SR_NX); -} - #ifdef CONFIG_PPC_KUAP #include <linux/sched.h> diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index f5be185cbdf8..e6c90802de03 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -64,7 +64,103 @@ struct ppc_bat { #define SR_KP 0x20000000 /* User key */ #define SR_KS 0x40000000 /* Supervisor key */ -#ifndef __ASSEMBLY__ +#ifdef __ASSEMBLY__ + +#include <asm/asm-offsets.h> + +.macro uus_addi sr reg1 reg2 imm + .if NUM_USER_SEGMENTS > \sr + addi \reg1,\reg2,\imm + .endif +.endm + +.macro uus_mtsr sr reg1 + .if NUM_USER_SEGMENTS > \sr + mtsr \sr, \reg1 + .endif +.endm + +.macro update_user_segments_by_4 tmp1 tmp2 tmp3 tmp4 + uus_addi 1, \tmp2, \tmp1, 0x111 + uus_addi 2, \tmp3, \tmp1, 0x222 + uus_addi 3, \tmp4, \tmp1, 0x333 + + uus_mtsr 0, \tmp1 + uus_mtsr 1, \tmp2 + uus_mtsr 2, \tmp3 + uus_mtsr 3, \tmp4 + + uus_addi 4, \tmp1, \tmp1, 0x444 + uus_addi 5, \tmp2, \tmp2, 0x444 + uus_addi 6, \tmp3, \tmp3, 0x444 + uus_addi 7, \tmp4, \tmp4, 0x444 + + uus_mtsr 4, \tmp1 + uus_mtsr 5, \tmp2 + uus_mtsr 6, \tmp3 + uus_mtsr 7, \tmp4 + + uus_addi 8, \tmp1, \tmp1, 0x444 + uus_addi 9, \tmp2, \tmp2, 0x444 + uus_addi 10, \tmp3, \tmp3, 0x444 + uus_addi 11, \tmp4, \tmp4, 0x444 + + uus_mtsr 8, \tmp1 + uus_mtsr 9, \tmp2 + uus_mtsr 10, \tmp3 + uus_mtsr 11, \tmp4 + + uus_addi 12, \tmp1, \tmp1, 0x444 + uus_addi 13, \tmp2, \tmp2, 0x444 + uus_addi 14, \tmp3, \tmp3, 0x444 + uus_addi 15, \tmp4, \tmp4, 0x444 + + uus_mtsr 12, \tmp1 + uus_mtsr 13, \tmp2 + uus_mtsr 14, \tmp3 + uus_mtsr 15, \tmp4 +.endm + +.macro update_user_segments_by_6 tmp1 tmp2 tmp3 tmp4 tmp5 tmp6 + uus_addi 1, \tmp2, \tmp1, 0x111 + uus_addi 2, \tmp3, \tmp1, 0x222 + uus_addi 3, \tmp4, \tmp1, 0x333 + uus_addi 4, \tmp5, \tmp1, 0x444 + uus_addi 5, \tmp6, \tmp1, 0x555 + + uus_mtsr 0, \tmp1 + uus_mtsr 1, \tmp2 + uus_mtsr 2, \tmp3 + uus_mtsr 3, \tmp4 + uus_mtsr 4, \tmp5 + uus_mtsr 5, \tmp6 + + uus_addi 6, \tmp1, \tmp1, 0x666 + uus_addi 7, \tmp2, \tmp2, 0x666 + uus_addi 8, \tmp3, \tmp3, 0x666 + uus_addi 9, \tmp4, \tmp4, 0x666 + uus_addi 10, \tmp5, \tmp5, 0x666 + uus_addi 11, \tmp6, \tmp6, 0x666 + + uus_mtsr 6, \tmp1 + uus_mtsr 7, \tmp2 + uus_mtsr 8, \tmp3 + uus_mtsr 9, \tmp4 + uus_mtsr 10, \tmp5 + uus_mtsr 11, \tmp6 + + uus_addi 12, \tmp1, \tmp1, 0x666 + uus_addi 13, \tmp2, \tmp2, 0x666 + uus_addi 14, \tmp3, \tmp3, 0x666 + uus_addi 15, \tmp4, \tmp4, 0x666 + + uus_mtsr 12, \tmp1 + uus_mtsr 13, \tmp2 + uus_mtsr 14, \tmp3 + uus_mtsr 15, \tmp4 +.endm + +#else /* * This macro defines the mapping from contexts to VSIDs (virtual diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index d4bdf7d274ac..d4349ca37084 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -139,12 +139,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); - if (user_mode(regs)) { - kuep_lock(); + if (user_mode(regs)) account_cpu_user_entry(); - } else { + else kuap_save_and_lock(regs); - } #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 1df763002726..34ff86e3686e 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -38,11 +38,6 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ -#ifndef CONFIG_PPC_BOOK3S_32 -static inline void kuep_lock(void) { } -static inline void kuep_unlock(void) { } -#endif - #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0273a1349006..84b51a387e95 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -52,6 +52,16 @@ #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: +#ifdef CONFIG_PPC_KUEP + beq 1f + + mfsr r4,0 + rlwinm r4,r4,0,8,3 + oris r4,r4,SR_NX@h + update_user_segments_by_6 r4, r5, r6, r7, r8, r9 + blr +1: +#endif /* if from kernel, check interrupted DOZE/NAP mode */ lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 @@ -93,6 +103,12 @@ transfer_to_syscall: SAVE_4GPRS(3, r1) SAVE_2GPRS(7, r1) addi r2,r10,-THREAD +#ifdef CONFIG_PPC_KUEP + mfsr r9,0 + rlwinm r9,r9,0,8,3 + oris r9,r9,SR_NX@h + update_user_segments_by_4 r9, r10, r11, r12 +#endif SAVE_NVGPRS(r1) /* Calling convention has r9 = orig r0, r10 = regs */ @@ -110,6 +126,11 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ +#ifdef CONFIG_PPC_KUEP + mfsr r7,0 + rlwinm r7,r7,0,8,2 + update_user_segments_by_6 r7, r8, r9, r10, r11, r12 +#endif lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -273,6 +294,11 @@ interrupt_return: beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare cmpwi r3,0 +#ifdef CONFIG_PPC_KUEP + mfsr r7,0 + rlwinm r7,r7,0,8,2 + update_user_segments_by_6 r7, r8, r9, r10, r11, r12 +#endif bne- .Lrestore_nvgprs .Lfast_user_interrupt_return: diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 6b1ec9e3541b..4df20fa73504 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -134,7 +134,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .macro prepare_transfer_to_handler #ifdef CONFIG_PPC_BOOK3S_32 andi. r12,r9,MSR_PR +#ifndef CONFIG_PPC_KUEP bne 777f +#endif bl prepare_transfer_to_handler 777: #endif diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 21bbd615ca41..cd6139003776 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -81,8 +81,6 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; - kuep_lock(); - regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -365,7 +363,6 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) /* Restore user access locks last */ kuap_user_restore(regs); - kuep_unlock(); return ret; } diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index c20733d6e02c..45c9967f9aef 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -7,8 +7,13 @@ struct static_key_false disable_kuep_key; void setup_kuep(bool disabled) { + if (disabled) { + pr_info("KUEP cannot be disabled for the time being\n"); + disabled = false; + } + if (!disabled) - kuep_lock(); + update_user_segments(mfsr(0) | SR_NX); if (smp_processor_id() != boot_cpuid) return;
When interrupt and syscall entries where converted to C, KUEP locking and unlocking was also converted. It improved performance by unrolling the loop, and allowed easily implementing boot time deactivation of KUEP. However, null_syscall selftest shows that KUEP is still heavy (361 cycles with KUEP, 210 cycles without). A way to improve more is to group 'mtsr's together, instead of repeating 'addi' + 'mtsr' several times. In order to do that, more registers need to be available. In C, GCC will always be able to provide the requested number of registers, but at the cost of saving some data on the stack, which is counter performant here. So let's do it in assembly, when we have full control of which register can be used. It also has the advantage of locking earlier and unlocking later and it helps GCC generating less tricky code. The only drawback is to make boot time deactivation less straight forward and require 'hand' instruction patching. In syscall entry, there are only 4 registers availables, so group 'mtsr's by 4. In interrupt entry, syscall exit and interrupt exist, we can group by 6, which means 2 groups for a typical config with 12 user segments. With this change, null_syscall selftest reports 334 cycles. Without the change it was 361 cycles, that's an 7% reduction. For the time being, capability to deactive at boot time is disabled. It will be re-enabled in following patch. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> --- arch/powerpc/include/asm/book3s/32/kup.h | 16 --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 98 ++++++++++++++++++- arch/powerpc/include/asm/interrupt.h | 6 +- arch/powerpc/include/asm/kup.h | 5 - arch/powerpc/kernel/entry_32.S | 26 +++++ arch/powerpc/kernel/head_32.h | 2 + arch/powerpc/kernel/interrupt.c | 3 - arch/powerpc/mm/book3s32/kuep.c | 7 +- 8 files changed, 133 insertions(+), 30 deletions(-)