Message ID | 20240415094820.399980-4-stefan.bader@canonical.com |
---|---|
State | New |
Headers | show |
Series | CVE-2024-2201 (v2) | expand |
On 15.04.24 11:48, Stefan Bader wrote: > From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> > > commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5 upstream. > > Branch History Injection (BHI) attacks may allow a malicious application to > influence indirect branch prediction in kernel by poisoning the branch > history. eIBRS isolates indirect branch targets in ring0. The BHB can > still influence the choice of indirect branch predictor entry, and although > branch predictor entries are isolated between modes when eIBRS is enabled, > the BHB itself is not isolated between modes. > > Alder Lake and new processors supports a hardware control BHI_DIS_S to > mitigate BHI. For older processors Intel has released a software sequence > to clear the branch history on parts that don't support BHI_DIS_S. Add > support to execute the software sequence at syscall entry and VMexit to > overwrite the branch history. > > For now, branch history is not cleared at interrupt entry, as malicious > applications are not believed to have sufficient control over the > registers, since previous register state is cleared at interrupt > entry. Researchers continue to poke at this area and it may become > necessary to clear at interrupt entry as well in the future. > > This mitigation is only defined here. It is enabled later. > > Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> > Co-developed-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> > Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com> > Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> > Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > > CVE-2024-2201 > (cherry picked from commit eb36b0dce2138581bc6b5e39d0273cb4c96ded81 linux-6.6.y) > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > arch/x86/entry/common.c | 4 +- > arch/x86/entry/entry_64.S | 61 ++++++++++++++++++++++++++++ > arch/x86/entry/entry_64_compat.S | 16 ++++++++ Here I accidentally dropped arch/x86/include/asm/cpufeatures.h | 3 +- which adds X86_FEATURE_CLEAR_BHB_LOOP. I got this locally amended and doing a test compile again. > arch/x86/include/asm/nospec-branch.h | 12 ++++++ > arch/x86/include/asm/syscall.h | 1 + > arch/x86/kvm/vmx/vmenter.S | 2 + > 6 files changed, 94 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c > index a60d19228890..e72dac092245 100644 > --- a/arch/x86/entry/common.c > +++ b/arch/x86/entry/common.c > @@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void) > } > > /** > - * int80_emulation - 32-bit legacy syscall entry > + * do_int80_emulation - 32-bit legacy syscall C entry from asm > * > * This entry point can be used by 32-bit and 64-bit programs to perform > * 32-bit system calls. Instances of INT $0x80 can be found inline in > @@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void) > * eax: system call number > * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 > */ > -DEFINE_IDTENTRY_RAW(int80_emulation) > +__visible noinstr void do_int80_emulation(struct pt_regs *regs) > { > int nr; > > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S > index 43606de22511..f61898385961 100644 > --- a/arch/x86/entry/entry_64.S > +++ b/arch/x86/entry/entry_64.S > @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) > /* clobbers %rax, make sure it is after saving the syscall nr */ > IBRS_ENTER > UNTRAIN_RET > + CLEAR_BRANCH_HISTORY > > call do_syscall_64 /* returns with IRQs disabled */ > > @@ -1538,3 +1539,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) > call make_task_dead > SYM_CODE_END(rewind_stack_and_make_dead) > .popsection > + > +/* > + * This sequence executes branches in order to remove user branch information > + * from the branch history tracker in the Branch Predictor, therefore removing > + * user influence on subsequent BTB lookups. > + * > + * It should be used on parts prior to Alder Lake. Newer parts should use the > + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being > + * virtualized on newer hardware the VMM should protect against BHI attacks by > + * setting BHI_DIS_S for the guests. > + * > + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging > + * and not clearing the branch history. The call tree looks like: > + * > + * call 1 > + * call 2 > + * call 2 > + * call 2 > + * call 2 > + * call 2 > + * ret > + * ret > + * ret > + * ret > + * ret > + * ret > + * > + * This means that the stack is non-constant and ORC can't unwind it with %rsp > + * alone. Therefore we unconditionally set up the frame pointer, which allows > + * ORC to unwind properly. > + * > + * The alignment is for performance and not for safety, and may be safely > + * refactored in the future if needed. > + */ > +SYM_FUNC_START(clear_bhb_loop) > + push %rbp > + mov %rsp, %rbp > + movl $5, %ecx > + ANNOTATE_INTRA_FUNCTION_CALL > + call 1f > + jmp 5f > + .align 64, 0xcc > + ANNOTATE_INTRA_FUNCTION_CALL > +1: call 2f > + RET > + .align 64, 0xcc > +2: movl $5, %eax > +3: jmp 4f > + nop > +4: sub $1, %eax > + jnz 3b > + sub $1, %ecx > + jnz 1b > + RET > +5: lfence > + pop %rbp > + RET > +SYM_FUNC_END(clear_bhb_loop) > +EXPORT_SYMBOL_GPL(clear_bhb_loop) > +STACK_FRAME_NON_STANDARD(clear_bhb_loop) > diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S > index 4e88f8438706..2819b8d5737f 100644 > --- a/arch/x86/entry/entry_64_compat.S > +++ b/arch/x86/entry/entry_64_compat.S > @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) > > IBRS_ENTER > UNTRAIN_RET > + CLEAR_BRANCH_HISTORY > > /* > * SYSENTER doesn't filter flags, so we need to clear NT and AC > @@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) > > IBRS_ENTER > UNTRAIN_RET > + CLEAR_BRANCH_HISTORY > > movq %rsp, %rdi > call do_fast_syscall_32 > @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) > ANNOTATE_NOENDBR > int3 > SYM_CODE_END(entry_SYSCALL_compat) > + > +/* > + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries > + * point to C routines, however since this is a system call interface the branch > + * history needs to be scrubbed to protect against BHI attacks, and that > + * scrubbing needs to take place in assembly code prior to entering any C > + * routines. > + */ > +SYM_CODE_START(int80_emulation) > + ANNOTATE_NOENDBR > + UNWIND_HINT_FUNC > + CLEAR_BRANCH_HISTORY > + jmp do_int80_emulation > +SYM_CODE_END(int80_emulation) > diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h > index 0396458c201f..cbb337b792a5 100644 > --- a/arch/x86/include/asm/nospec-branch.h > +++ b/arch/x86/include/asm/nospec-branch.h > @@ -329,6 +329,14 @@ > #endif > .endm > > +#ifdef CONFIG_X86_64 > +.macro CLEAR_BRANCH_HISTORY > + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP > +.endm > +#else > +#define CLEAR_BRANCH_HISTORY > +#endif > + > #else /* __ASSEMBLY__ */ > > #define ANNOTATE_RETPOLINE_SAFE \ > @@ -359,6 +367,10 @@ extern void srso_alias_untrain_ret(void); > extern void entry_untrain_ret(void); > extern void entry_ibpb(void); > > +#ifdef CONFIG_X86_64 > +extern void clear_bhb_loop(void); > +#endif > + > extern void (*x86_return_thunk)(void); > > #ifdef CONFIG_CALL_DEPTH_TRACKING > diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h > index b1d2a731cf60..03bb950eba69 100644 > --- a/arch/x86/include/asm/syscall.h > +++ b/arch/x86/include/asm/syscall.h > @@ -125,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) > } > > void do_syscall_64(struct pt_regs *regs, int nr); > +void do_int80_emulation(struct pt_regs *regs); > > #endif /* CONFIG_X86_32 */ > > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > index be275a0410a8..e75dff1d6f97 100644 > --- a/arch/x86/kvm/vmx/vmenter.S > +++ b/arch/x86/kvm/vmx/vmenter.S > @@ -272,6 +272,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) > > call vmx_spec_ctrl_restore_host > > + CLEAR_BRANCH_HISTORY > + > /* Put return value in AX */ > mov %_ASM_BX, %_ASM_AX >
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a60d19228890..e72dac092245 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 43606de22511..f61898385961 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1538,3 +1539,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4e88f8438706..2819b8d5737f 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 0396458c201f..cbb337b792a5 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -329,6 +329,14 @@ #endif .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm +#else +#define CLEAR_BRANCH_HISTORY +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -359,6 +367,10 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); #ifdef CONFIG_CALL_DEPTH_TRACKING diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index b1d2a731cf60..03bb950eba69 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -125,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) } void do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index be275a0410a8..e75dff1d6f97 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -272,6 +272,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX