Message ID | 20240417235401.243631-6-yuxuan.luo@canonical.com |
---|---|
State | New |
Headers | show |
Series | CVE-2024-2201 | expand |
On 4/17/24 19:53, Yuxuan Luo wrote: > From: Linus Torvalds <torvalds@linux-foundation.org> > > Make <asm/syscall.h> build a switch statement instead, and the compiler can > either decide to generate an indirect jump, or - more likely these days due > to mitigations - just a series of conditional branches. > > Yes, the conditional branches also have branch prediction, but the branch > prediction is much more controlled, in that it just causes speculatively > running the wrong system call (harmless), rather than speculatively running > possibly wrong random less controlled code gadgets. > > This doesn't mitigate other indirect calls, but the system call indirection > is the first and most easily triggered case. > > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> > > (backported from commit 1e3ad78334a69b36e107232e337f9d693dcc9df2) > [yuxuan.luo: > common.c: > - substitute *sys_call_table[nr]() with corresponding *sys_call(regs, > nr). > - For do_syscall_irqs_on()/ia32_sys_call(), substitute both and we’ll > define different ia32_sys_call() in syscall_32.c using macros. > > syscall_x32.c: > - x32_sys_call() should move to syscall_64.c since they both rely on > __x64_ system calls. > > syscall_32.c: > - Focal tree is using __SYSCALL_I386 rather than __SYSCALL, substitute > __SYSCALL with __SYSCALL_I386 in #define lines. > - For the case where CONFIG_IA32_EMULATION=n, expand regs to six > variables. > - Since syscall table in Focal includes arch name in syscalls prefixes > already, change 'case nr: return __x64_##sym()' to '... return > sym()'. This applies to syscall_64.c as well. > > syscall_64.c: > - For x64_sys_call(), substitute __SYSCALL with __SYSCALL_64. > - For x32_sys_call(), wrap it with #ifdef CONFIG_X86_X32_ABI to comply > with x32_sys_call() in common.c. Use macros to ignored __SYSCALL_64 > lines and substitute __SYSCALL_X32 lines. > ] > CVE-2024-2201 > Signed-off-by: Yuxuan Luo <yuxuan.luo@canonical.com> > --- > arch/x86/entry/common.c | 11 ++++------- > arch/x86/entry/syscall_32.c | 33 +++++++++++++++++++++++++++++++++ > arch/x86/entry/syscall_64.c | 27 +++++++++++++++++++++++++++ > arch/x86/include/asm/syscall.h | 4 ++++ > 4 files changed, 68 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c > index 721109c0cf994..dec36a7133821 100644 > --- a/arch/x86/entry/common.c > +++ b/arch/x86/entry/common.c > @@ -288,13 +288,13 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) > > if (likely(nr < NR_syscalls)) { > nr = array_index_nospec(nr, NR_syscalls); > - regs->ax = sys_call_table[nr](regs); > + regs->ax = x64_sys_call(regs, nr); > #ifdef CONFIG_X86_X32_ABI > } else if (likely((nr & __X32_SYSCALL_BIT) && > (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) { > nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT, > X32_NR_syscalls); > - regs->ax = x32_sys_call_table[nr](regs); > + regs->ax = x32_sys_call(regs, nr); > #endif > } > > @@ -331,7 +331,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) > if (likely(nr < IA32_NR_syscalls)) { > nr = array_index_nospec(nr, IA32_NR_syscalls); > #ifdef CONFIG_IA32_EMULATION > - regs->ax = ia32_sys_call_table[nr](regs); > + regs->ax = ia32_sys_call(regs, nr); > #else > /* > * It's possible that a 32-bit syscall implementation > @@ -339,10 +339,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) > * the high bits are zero. Make sure we zero-extend all > * of the args. > */ > - regs->ax = ia32_sys_call_table[nr]( > - (unsigned int)regs->bx, (unsigned int)regs->cx, > - (unsigned int)regs->dx, (unsigned int)regs->si, > - (unsigned int)regs->di, (unsigned int)regs->bp); > + regs->ax = ia32_sys_call(regs, nr); > #endif /* CONFIG_IA32_EMULATION */ > } > > diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c > index 7d17b3addbbb3..e4e186ab20567 100644 > --- a/arch/x86/entry/syscall_32.c > +++ b/arch/x86/entry/syscall_32.c > @@ -30,3 +30,36 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = > [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, > #include <asm/syscalls_32.h> > }; > +#undef __SYSCALL_I386 > + > +#ifdef CONFIG_IA32_EMULATION > + > +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(regs); > + > +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) > +{ > + switch (nr) { > + #include <asm/syscalls_32.h> > + default: return __ia32_sys_ni_syscall(regs); > + } > +}; > + > +#else /* CONFIG_IA32_EMULATION */ > + > +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym( \ > + (unsigned int)regs->bx, (unsigned int)regs->cx, \ > + (unsigned int)regs->dx, (unsigned int)regs->si, \ > + (unsigned int)regs->di, (unsigned int)regs->bp); > + > +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) > +{ > + switch (nr) { > + #include <asm/syscalls_32.h> > + default: return __ia32_sys_ni_syscall( > + (unsigned int)regs->bx, (unsigned int)regs->cx, > + (unsigned int)regs->dx, (unsigned int)regs->si, > + (unsigned int)regs->di, (unsigned int)regs->bp); > + } > +}; > + > +#endif /* CONFIG_IA32_EMULATION */ > diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c > index adf619a856e8d..197835442c265 100644 > --- a/arch/x86/entry/syscall_64.c > +++ b/arch/x86/entry/syscall_64.c > @@ -54,3 +54,30 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { > #undef __SYSCALL_X32 > > #endif > + > +#define __SYSCALL_64(nr, sym, qual) case nr: return sym(regs); > +#define __SYSCALL_X32(nr, sym, qual) > + > +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) > +{ > + switch (nr) { > + #include <asm/syscalls_64.h> > + default: return __x64_sys_ni_syscall(regs); > + } > +}; > + > +#ifdef CONFIG_X86_X32_ABI > + > +#define __SYSCALL_64(nr, sym, qual) > +#define __SYSCALL_X32(nr, sym, qual) case nr: return sym(regs); > +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) > +{ > + switch (nr) { > + #include <asm/syscalls_64.h> > + default: return __x64_sys_ni_syscall(regs); > + } > +} > +#undef __SYSCALL_64 > +#undef __SYSCALL_X32 > + > +#endif /* CONFIG_X86_X32_ABI */ > diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h > index 8db3fdb6102ec..5d131e7206a26 100644 > --- a/arch/x86/include/asm/syscall.h > +++ b/arch/x86/include/asm/syscall.h > @@ -40,6 +40,10 @@ extern const sys_call_ptr_t ia32_sys_call_table[]; > extern const sys_call_ptr_t x32_sys_call_table[]; > #endif > > +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); Maybe we should move this line to the #if defined(CONFIG_IA32_EMULATION) block above. > +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); > +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); > + > /* > * Only the low 32 bits of orig_ax are meaningful, so we return int. > * This importantly ignores the high bits on 64-bit, so comparisons
On 4/19/24 23:28, Yuxuan Luo wrote: > > On 4/17/24 19:53, Yuxuan Luo wrote: >> From: Linus Torvalds <torvalds@linux-foundation.org> >> >> Make <asm/syscall.h> build a switch statement instead, and the >> compiler can >> either decide to generate an indirect jump, or - more likely these >> days due >> to mitigations - just a series of conditional branches. >> >> Yes, the conditional branches also have branch prediction, but the branch >> prediction is much more controlled, in that it just causes speculatively >> running the wrong system call (harmless), rather than speculatively >> running >> possibly wrong random less controlled code gadgets. >> >> This doesn't mitigate other indirect calls, but the system call >> indirection >> is the first and most easily triggered case. >> >> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> >> Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> >> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> >> Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> >> >> (backported from commit 1e3ad78334a69b36e107232e337f9d693dcc9df2) >> [yuxuan.luo: >> common.c: >> - substitute *sys_call_table[nr]() with corresponding *sys_call(regs, >> nr). >> - For do_syscall_irqs_on()/ia32_sys_call(), substitute both and we’ll >> define different ia32_sys_call() in syscall_32.c using macros. >> >> syscall_x32.c: >> - x32_sys_call() should move to syscall_64.c since they both rely on >> __x64_ system calls. >> >> syscall_32.c: >> - Focal tree is using __SYSCALL_I386 rather than __SYSCALL, substitute >> __SYSCALL with __SYSCALL_I386 in #define lines. >> - For the case where CONFIG_IA32_EMULATION=n, expand regs to six >> variables. >> - Since syscall table in Focal includes arch name in syscalls prefixes >> already, change 'case nr: return __x64_##sym()' to '... return >> sym()'. This applies to syscall_64.c as well. >> >> syscall_64.c: >> - For x64_sys_call(), substitute __SYSCALL with __SYSCALL_64. >> - For x32_sys_call(), wrap it with #ifdef CONFIG_X86_X32_ABI to comply >> with x32_sys_call() in common.c. Use macros to ignored __SYSCALL_64 >> lines and substitute __SYSCALL_X32 lines. >> ] >> CVE-2024-2201 >> Signed-off-by: Yuxuan Luo <yuxuan.luo@canonical.com> >> --- >> arch/x86/entry/common.c | 11 ++++------- >> arch/x86/entry/syscall_32.c | 33 +++++++++++++++++++++++++++++++++ >> arch/x86/entry/syscall_64.c | 27 +++++++++++++++++++++++++++ >> arch/x86/include/asm/syscall.h | 4 ++++ >> 4 files changed, 68 insertions(+), 7 deletions(-) >> >> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c >> index 721109c0cf994..dec36a7133821 100644 >> --- a/arch/x86/entry/common.c >> +++ b/arch/x86/entry/common.c >> @@ -288,13 +288,13 @@ __visible void do_syscall_64(unsigned long nr, >> struct pt_regs *regs) >> if (likely(nr < NR_syscalls)) { >> nr = array_index_nospec(nr, NR_syscalls); >> - regs->ax = sys_call_table[nr](regs); >> + regs->ax = x64_sys_call(regs, nr); >> #ifdef CONFIG_X86_X32_ABI >> } else if (likely((nr & __X32_SYSCALL_BIT) && >> (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) { >> nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT, >> X32_NR_syscalls); >> - regs->ax = x32_sys_call_table[nr](regs); >> + regs->ax = x32_sys_call(regs, nr); >> #endif >> } >> @@ -331,7 +331,7 @@ static __always_inline void >> do_syscall_32_irqs_on(struct pt_regs *regs) >> if (likely(nr < IA32_NR_syscalls)) { >> nr = array_index_nospec(nr, IA32_NR_syscalls); >> #ifdef CONFIG_IA32_EMULATION >> - regs->ax = ia32_sys_call_table[nr](regs); >> + regs->ax = ia32_sys_call(regs, nr); >> #else >> /* >> * It's possible that a 32-bit syscall implementation >> @@ -339,10 +339,7 @@ static __always_inline void >> do_syscall_32_irqs_on(struct pt_regs *regs) >> * the high bits are zero. Make sure we zero-extend all >> * of the args. >> */ >> - regs->ax = ia32_sys_call_table[nr]( >> - (unsigned int)regs->bx, (unsigned int)regs->cx, >> - (unsigned int)regs->dx, (unsigned int)regs->si, >> - (unsigned int)regs->di, (unsigned int)regs->bp); >> + regs->ax = ia32_sys_call(regs, nr); >> #endif /* CONFIG_IA32_EMULATION */ >> } >> diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c >> index 7d17b3addbbb3..e4e186ab20567 100644 >> --- a/arch/x86/entry/syscall_32.c >> +++ b/arch/x86/entry/syscall_32.c >> @@ -30,3 +30,36 @@ __visible const sys_call_ptr_t >> ia32_sys_call_table[__NR_syscall_compat_max+1] = >> [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, >> #include <asm/syscalls_32.h> >> }; >> +#undef __SYSCALL_I386 >> + >> +#ifdef CONFIG_IA32_EMULATION >> + >> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(regs); >> + >> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) >> +{ >> + switch (nr) { >> + #include <asm/syscalls_32.h> >> + default: return __ia32_sys_ni_syscall(regs); >> + } >> +}; >> + >> +#else /* CONFIG_IA32_EMULATION */ >> + >> +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym( \ >> + (unsigned int)regs->bx, (unsigned int)regs->cx, \ >> + (unsigned int)regs->dx, (unsigned int)regs->si, \ >> + (unsigned int)regs->di, (unsigned int)regs->bp); >> + >> +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) >> +{ >> + switch (nr) { >> + #include <asm/syscalls_32.h> >> + default: return __ia32_sys_ni_syscall( >> + (unsigned int)regs->bx, (unsigned int)regs->cx, >> + (unsigned int)regs->dx, (unsigned int)regs->si, >> + (unsigned int)regs->di, (unsigned int)regs->bp); >> + } >> +}; >> + >> +#endif /* CONFIG_IA32_EMULATION */ >> diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c >> index adf619a856e8d..197835442c265 100644 >> --- a/arch/x86/entry/syscall_64.c >> +++ b/arch/x86/entry/syscall_64.c >> @@ -54,3 +54,30 @@ asmlinkage const sys_call_ptr_t >> x32_sys_call_table[__NR_syscall_x32_max+1] = { >> #undef __SYSCALL_X32 >> #endif >> + >> +#define __SYSCALL_64(nr, sym, qual) case nr: return sym(regs); >> +#define __SYSCALL_X32(nr, sym, qual) >> + >> +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) >> +{ >> + switch (nr) { >> + #include <asm/syscalls_64.h> >> + default: return __x64_sys_ni_syscall(regs); >> + } >> +}; >> + >> +#ifdef CONFIG_X86_X32_ABI >> + >> +#define __SYSCALL_64(nr, sym, qual) >> +#define __SYSCALL_X32(nr, sym, qual) case nr: return sym(regs); >> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) >> +{ >> + switch (nr) { >> + #include <asm/syscalls_64.h> >> + default: return __x64_sys_ni_syscall(regs); >> + } >> +} >> +#undef __SYSCALL_64 >> +#undef __SYSCALL_X32 >> + >> +#endif /* CONFIG_X86_X32_ABI */ >> diff --git a/arch/x86/include/asm/syscall.h >> b/arch/x86/include/asm/syscall.h >> index 8db3fdb6102ec..5d131e7206a26 100644 >> --- a/arch/x86/include/asm/syscall.h >> +++ b/arch/x86/include/asm/syscall.h >> @@ -40,6 +40,10 @@ extern const sys_call_ptr_t ia32_sys_call_table[]; >> extern const sys_call_ptr_t x32_sys_call_table[]; >> #endif >> +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); > Maybe we should move this line to the #if defined(CONFIG_IA32_EMULATION) > block above. Judging by the self comment here, I guess we're not very confident on the patchset, yet. Did we test this patchset on focal very throughly? Just a compile test for a dramatic change like this is absolutely not enough. >> +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); >> +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); >> + >> /* >> * Only the low 32 bits of orig_ax are meaningful, so we return int. >> * This importantly ignores the high bits on 64-bit, so comparisons >
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 721109c0cf994..dec36a7133821 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -288,13 +288,13 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) if (likely(nr < NR_syscalls)) { nr = array_index_nospec(nr, NR_syscalls); - regs->ax = sys_call_table[nr](regs); + regs->ax = x64_sys_call(regs, nr); #ifdef CONFIG_X86_X32_ABI } else if (likely((nr & __X32_SYSCALL_BIT) && (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) { nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT, X32_NR_syscalls); - regs->ax = x32_sys_call_table[nr](regs); + regs->ax = x32_sys_call(regs, nr); #endif } @@ -331,7 +331,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) if (likely(nr < IA32_NR_syscalls)) { nr = array_index_nospec(nr, IA32_NR_syscalls); #ifdef CONFIG_IA32_EMULATION - regs->ax = ia32_sys_call_table[nr](regs); + regs->ax = ia32_sys_call(regs, nr); #else /* * It's possible that a 32-bit syscall implementation @@ -339,10 +339,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) * the high bits are zero. Make sure we zero-extend all * of the args. */ - regs->ax = ia32_sys_call_table[nr]( - (unsigned int)regs->bx, (unsigned int)regs->cx, - (unsigned int)regs->dx, (unsigned int)regs->si, - (unsigned int)regs->di, (unsigned int)regs->bp); + regs->ax = ia32_sys_call(regs, nr); #endif /* CONFIG_IA32_EMULATION */ } diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 7d17b3addbbb3..e4e186ab20567 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -30,3 +30,36 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, #include <asm/syscalls_32.h> }; +#undef __SYSCALL_I386 + +#ifdef CONFIG_IA32_EMULATION + +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_32.h> + default: return __ia32_sys_ni_syscall(regs); + } +}; + +#else /* CONFIG_IA32_EMULATION */ + +#define __SYSCALL_I386(nr, sym, qual) case nr: return sym( \ + (unsigned int)regs->bx, (unsigned int)regs->cx, \ + (unsigned int)regs->dx, (unsigned int)regs->si, \ + (unsigned int)regs->di, (unsigned int)regs->bp); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_32.h> + default: return __ia32_sys_ni_syscall( + (unsigned int)regs->bx, (unsigned int)regs->cx, + (unsigned int)regs->dx, (unsigned int)regs->si, + (unsigned int)regs->di, (unsigned int)regs->bp); + } +}; + +#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index adf619a856e8d..197835442c265 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -54,3 +54,30 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { #undef __SYSCALL_X32 #endif + +#define __SYSCALL_64(nr, sym, qual) case nr: return sym(regs); +#define __SYSCALL_X32(nr, sym, qual) + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_64.h> + default: return __x64_sys_ni_syscall(regs); + } +}; + +#ifdef CONFIG_X86_X32_ABI + +#define __SYSCALL_64(nr, sym, qual) +#define __SYSCALL_X32(nr, sym, qual) case nr: return sym(regs); +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_64.h> + default: return __x64_sys_ni_syscall(regs); + } +} +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#endif /* CONFIG_X86_X32_ABI */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8db3fdb6102ec..5d131e7206a26 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -40,6 +40,10 @@ extern const sys_call_ptr_t ia32_sys_call_table[]; extern const sys_call_ptr_t x32_sys_call_table[]; #endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons