diff mbox series

[04/15] x86/bhi: Add support for clearing branch history at syscall entry

Message ID 20240517075728.9722-5-yuxuan.luo@canonical.com
State New
Headers show
Series CVE-2024-2201 | expand

Commit Message

Yuxuan Luo May 17, 2024, 7:57 a.m. UTC
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>

commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5 upstream.

Branch History Injection (BHI) attacks may allow a malicious application to
influence indirect branch prediction in kernel by poisoning the branch
history. eIBRS isolates indirect branch targets in ring0.  The BHB can
still influence the choice of indirect branch predictor entry, and although
branch predictor entries are isolated between modes when eIBRS is enabled,
the BHB itself is not isolated between modes.

Alder Lake and new processors supports a hardware control BHI_DIS_S to
mitigate BHI.  For older processors Intel has released a software sequence
to clear the branch history on parts that don't support BHI_DIS_S. Add
support to execute the software sequence at syscall entry and VMexit to
overwrite the branch history.

For now, branch history is not cleared at interrupt entry, as malicious
applications are not believed to have sufficient control over the
registers, since previous register state is cleared at interrupt
entry. Researchers continue to poke at this area and it may become
necessary to clear at interrupt entry as well in the future.

This mitigation is only defined here. It is enabled later.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Co-developed-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(backported from commit bd53ec80f21839cfd4d852a6088279d602d67e5b linux-5.15.y)
[yuxuan.luo:
- entry_64.S:
  - Since bc7b11c04ee9 (“x86/asm/64: Change all ENTRY+END to
    SYM_CODE_*”) is not in the tree, substitute
    SYM_FUNC_START/SYM_FUNC_END with ENTRY/END
  - Dropped STACK_FRAME_NON_STANDARD and ANNOTATE_INTRA_FUNCTION_CALL
    (not supported by objtool).

- entry_64_compat.S
- nospec-branch.S
  - Applied patches with some tweaks on context.
]
CVE-2024-2201
Signed-off-by: Yuxuan Luo <yuxuan.luo@canonical.com>
---
 arch/x86/entry/entry_64.S            | 58 ++++++++++++++++++++++++++++
 arch/x86/entry/entry_64_compat.S     |  3 ++
 arch/x86/include/asm/cpufeatures.h   |  8 ++++
 arch/x86/include/asm/nospec-branch.h | 12 ++++++
 arch/x86/kvm/vmx/vmenter.S           |  2 +
 5 files changed, 83 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 640c7d36c26c7..c3806919cca63 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -175,6 +175,7 @@  GLOBAL(entry_SYSCALL_64_after_hwframe)
 
 	/* clobbers %rax, make sure it is after saving the syscall nr */
 	IBRS_ENTER
+	CLEAR_BRANCH_HISTORY
 
 	call	do_syscall_64		/* returns with IRQs disabled */
 
@@ -1768,3 +1769,60 @@  ENTRY(rewind_stack_and_make_dead)
 
 	call	make_task_dead
 END(rewind_stack_and_make_dead)
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ *    call 2
+ *      call 2
+ *        call 2
+ *          call 2
+ * 	      call 2
+ * 	      ret
+ * 	    ret
+ *        ret
+ *      ret
+ *    ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone.  Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+ENTRY(clear_bhb_loop)
+	push	%rbp
+	mov	%rsp, %rbp
+	movl	$5, %ecx
+	call	1f
+	jmp	5f
+	.align 64, 0xcc
+1:	call	2f
+	RET
+	.align 64, 0xcc
+2:	movl	$5, %eax
+3:	jmp	4f
+	nop
+4:	sub	$1, %eax
+	jnz	3b
+	sub	$1, %ecx
+	jnz	1b
+	RET
+5:	lfence
+	pop	%rbp
+	RET
+END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c3c4ea4a6711a..fe6e25951d408 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -108,6 +108,7 @@  ENTRY(entry_SYSENTER_compat)
 	cld
 
 	IBRS_ENTER
+	CLEAR_BRANCH_HISTORY
 
 	/*
 	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -257,6 +258,7 @@  GLOBAL(entry_SYSCALL_compat_after_hwframe)
 	TRACE_IRQS_OFF
 
 	IBRS_ENTER
+	CLEAR_BRANCH_HISTORY
 
 	movq	%rsp, %rdi
 	call	do_fast_syscall_32
@@ -417,6 +419,7 @@  ENTRY(entry_INT80_compat)
 	 */
 	TRACE_IRQS_OFF
 	IBRS_ENTER
+	CLEAR_BRANCH_HISTORY
 
 	movq	%rsp, %rdi
 	call	do_int80_syscall_32
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 68fd012e9df6e..0e7df37a74399 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -382,6 +382,14 @@ 
 #define X86_FEATURE_SEV_ES		(19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
 #define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
 
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_CLEAR_BHB_LOOP	(21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c8819358a332c..d6824fb49080d 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -175,6 +175,14 @@ 
 .Lskip_rsb_\@:
 .endm
 
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#endif
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_RETPOLINE_SAFE					\
@@ -183,6 +191,10 @@ 
 	_ASM_PTR " 999b\n\t"					\
 	".popsection\n\t"
 
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
 #ifdef CONFIG_RETPOLINE
 #ifdef CONFIG_X86_64
 
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 2850670c38bb0..8cbebde85a4f6 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -198,6 +198,8 @@  SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 
 	call vmx_spec_ctrl_restore_host
 
+	CLEAR_BRANCH_HISTORY
+
 	/* Put return value in AX */
 	mov %_ASM_BX, %_ASM_AX