Message ID | 20230920123115.4106721-1-anton.ivanov@cambridgegreys.com |
---|---|
State | Superseded |
Headers | show |
Series | um: Enable preemption in UML | expand |
On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote: > > From: Anton Ivanov anton.ivanov@cambridgegreys.com > > > Preemption requires saving/restoring FPU state. This patch > adds support for it using GCC intrinsics. > > Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com > > --- > arch/um/Kconfig | 1 - > arch/um/Makefile | 3 +- > arch/um/include/asm/fpu/api.h | 4 +- > arch/um/include/asm/processor-generic.h | 1 + > arch/um/kernel/Makefile | 2 +- > arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++ > 6 files changed, 55 insertions(+), 5 deletions(-) > create mode 100644 arch/um/kernel/fpu.c > > diff --git a/arch/um/Kconfig b/arch/um/Kconfig > index b5e179360534..603f5fd82293 100644 > --- a/arch/um/Kconfig > +++ b/arch/um/Kconfig > @@ -11,7 +11,6 @@ config UML > select ARCH_HAS_KCOV > select ARCH_HAS_STRNCPY_FROM_USER > select ARCH_HAS_STRNLEN_USER > - select ARCH_NO_PREEMPT > select HAVE_ARCH_AUDITSYSCALL > select HAVE_ARCH_KASAN if X86_64 > select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN > diff --git a/arch/um/Makefile b/arch/um/Makefile > index 82f05f250634..35a059baadeb 100644 > --- a/arch/um/Makefile > +++ b/arch/um/Makefile > @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ > $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ > -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ > -Din6addr_loopback=kernel_in6addr_loopback \ > - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr > + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ > + -mxsave Since we're now using a new isa extention, I would add a check in linux_main() with a warning message if xsave is not available. It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled. > > KBUILD_RUSTFLAGS += -Crelocation-model=pie > > diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h > index 71bfd9ef3938..0094624ae9b4 100644 > --- a/arch/um/include/asm/fpu/api.h > +++ b/arch/um/include/asm/fpu/api.h > @@ -8,8 +8,8 @@ > * of x86 optimized copy, xor, etc routines into the > * UML code tree. / > > -#define kernel_fpu_begin() (void)0 > -#define kernel_fpu_end() (void)0 > +void kernel_fpu_begin(void); > +void kernel_fpu_end(void); > > static inline bool irq_fpu_usable(void) > { > diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h > index 7414154b8e9a..1c3287f1a444 100644 > --- a/arch/um/include/asm/processor-generic.h > +++ b/arch/um/include/asm/processor-generic.h > @@ -44,6 +44,7 @@ struct thread_struct { > } cb; > } u; > } request; > + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes / > }; > > #define INIT_THREAD \ > diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile > index 811188be954c..5d9fbaa544be 100644 > --- a/arch/um/kernel/Makefile > +++ b/arch/um/kernel/Makefile > @@ -16,7 +16,7 @@ extra-y := vmlinux.lds > > obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ > physmem.o process.o ptrace.o reboot.o sigio.o \ > - signal.o sysrq.o time.o tlb.o trap.o \ > + signal.o sysrq.o time.o tlb.o trap.o fpu.o\ > um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ > obj-y += load_file.o > > diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c > new file mode 100644 > index 000000000000..4142a08c474d > --- /dev/null > +++ b/arch/um/kernel/fpu.c > @@ -0,0 +1,49 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/ > + * Copyright (C) 2023 Cambridge Greys Ltd > + * Copyright (C) 2023 Red Hat Inc > + */ > + > +#include <linux/cpu.h> > > +#include <linux/init.h> > > +#include <asm/fpu/api.h> > > + > +/* > + * The critical section between kernel_fpu_begin() and kernel_fpu_end() > + * is non-reentrant. It is the caller's responsibility to avoid reentrance. > + */ > + > +static DEFINE_PER_CPU(bool, in_kernel_fpu); > + > +void kernel_fpu_begin(void) > +{ > + preempt_disable(); > + > + WARN_ON(this_cpu_read(in_kernel_fpu)); > + > + this_cpu_write(in_kernel_fpu, true); > + > +#ifdef CONFIG_64BIT > + __builtin_ia32_fxsave64(¤t->thread.fpu); > > +#else > + __builtin_ia32_fxsave(¤t->thread.fpu); > > +#endif > +} > + > +EXPORT_SYMBOL_GPL(kernel_fpu_begin); > + > +void kernel_fpu_end(void) > +{ > + WARN_ON(!this_cpu_read(in_kernel_fpu)); > + > +#ifdef CONFIG_64BIT > + __builtin_ia32_fxrstor64(¤t->thread.fpu); > > +#else > + __builtin_ia32_fxrstor(¤t->thread.fpu); > > +#endif > + this_cpu_write(in_kernel_fpu, false); > + > + preempt_enable(); > +} > +EXPORT_SYMBOL_GPL(kernel_fpu_end); > + According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor instructions do not save ymm, zmm, or mask registers. Please make that explicit in your comment, or replace intrinsics with inline assembler to support those registers. Several x86 crypto routines use these larger registers which could overwrite userspace registers. These modules aren't built by default, and may not be available on UML. Even so, this issue is worth a comment, at the least. On my system, lscpu within uml shows all the feature flags that are available on the host system, which includes AVX extentions. Cheers, Peter Lafreniere
On 20/09/2023 15:49, Peter Lafreniere wrote: > On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote: >> >> From: Anton Ivanov anton.ivanov@cambridgegreys.com >> >> >> Preemption requires saving/restoring FPU state. This patch >> adds support for it using GCC intrinsics. >> >> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com >> >> --- >> arch/um/Kconfig | 1 - >> arch/um/Makefile | 3 +- >> arch/um/include/asm/fpu/api.h | 4 +- >> arch/um/include/asm/processor-generic.h | 1 + >> arch/um/kernel/Makefile | 2 +- >> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++ >> 6 files changed, 55 insertions(+), 5 deletions(-) >> create mode 100644 arch/um/kernel/fpu.c >> >> diff --git a/arch/um/Kconfig b/arch/um/Kconfig >> index b5e179360534..603f5fd82293 100644 >> --- a/arch/um/Kconfig >> +++ b/arch/um/Kconfig >> @@ -11,7 +11,6 @@ config UML >> select ARCH_HAS_KCOV >> select ARCH_HAS_STRNCPY_FROM_USER >> select ARCH_HAS_STRNLEN_USER >> - select ARCH_NO_PREEMPT >> select HAVE_ARCH_AUDITSYSCALL >> select HAVE_ARCH_KASAN if X86_64 >> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN >> diff --git a/arch/um/Makefile b/arch/um/Makefile >> index 82f05f250634..35a059baadeb 100644 >> --- a/arch/um/Makefile >> +++ b/arch/um/Makefile >> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ >> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ >> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ >> -Din6addr_loopback=kernel_in6addr_loopback \ >> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr >> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ >> + -mxsave > > Since we're now using a new isa extention, I would add a check in > linux_main() with a warning message if xsave is not available. > > It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled. Ack. Will do. The patch will have a few versions. I would like to try to sync up with the existing PTRACE fpregs code used for the running the userspace if possible. We should (at least) use the same regs store/restore space. > >> >> KBUILD_RUSTFLAGS += -Crelocation-model=pie >> >> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h >> index 71bfd9ef3938..0094624ae9b4 100644 >> --- a/arch/um/include/asm/fpu/api.h >> +++ b/arch/um/include/asm/fpu/api.h >> @@ -8,8 +8,8 @@ >> * of x86 optimized copy, xor, etc routines into the >> * UML code tree. / >> >> -#define kernel_fpu_begin() (void)0 >> -#define kernel_fpu_end() (void)0 >> +void kernel_fpu_begin(void); >> +void kernel_fpu_end(void); >> >> static inline bool irq_fpu_usable(void) >> { >> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h >> index 7414154b8e9a..1c3287f1a444 100644 >> --- a/arch/um/include/asm/processor-generic.h >> +++ b/arch/um/include/asm/processor-generic.h >> @@ -44,6 +44,7 @@ struct thread_struct { >> } cb; >> } u; >> } request; >> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes / >> }; >> >> #define INIT_THREAD \ >> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile >> index 811188be954c..5d9fbaa544be 100644 >> --- a/arch/um/kernel/Makefile >> +++ b/arch/um/kernel/Makefile >> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds >> >> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ >> physmem.o process.o ptrace.o reboot.o sigio.o \ >> - signal.o sysrq.o time.o tlb.o trap.o \ >> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\ >> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ >> obj-y += load_file.o >> >> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c >> new file mode 100644 >> index 000000000000..4142a08c474d >> --- /dev/null >> +++ b/arch/um/kernel/fpu.c >> @@ -0,0 +1,49 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/ >> + * Copyright (C) 2023 Cambridge Greys Ltd >> + * Copyright (C) 2023 Red Hat Inc >> + */ >> + >> +#include <linux/cpu.h> >> >> +#include <linux/init.h> >> >> +#include <asm/fpu/api.h> >> >> + >> +/* >> + * The critical section between kernel_fpu_begin() and kernel_fpu_end() >> + * is non-reentrant. It is the caller's responsibility to avoid reentrance. >> + */ >> + >> +static DEFINE_PER_CPU(bool, in_kernel_fpu); >> + >> +void kernel_fpu_begin(void) >> +{ >> + preempt_disable(); >> + >> + WARN_ON(this_cpu_read(in_kernel_fpu)); >> + >> + this_cpu_write(in_kernel_fpu, true); >> + >> +#ifdef CONFIG_64BIT >> + __builtin_ia32_fxsave64(¤t->thread.fpu); >> >> +#else >> + __builtin_ia32_fxsave(¤t->thread.fpu); >> >> +#endif >> +} >> + >> +EXPORT_SYMBOL_GPL(kernel_fpu_begin); >> + >> +void kernel_fpu_end(void) >> +{ >> + WARN_ON(!this_cpu_read(in_kernel_fpu)); >> + >> +#ifdef CONFIG_64BIT >> + __builtin_ia32_fxrstor64(¤t->thread.fpu); >> >> +#else >> + __builtin_ia32_fxrstor(¤t->thread.fpu); >> >> +#endif >> + this_cpu_write(in_kernel_fpu, false); >> + >> + preempt_enable(); >> +} >> +EXPORT_SYMBOL_GPL(kernel_fpu_end); >> + > > According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor > instructions do not save ymm, zmm, or mask registers. Please make that explicit > in your comment, or replace intrinsics with inline assembler to support those > registers. > > Several x86 crypto routines use these larger registers which could overwrite > userspace registers. These modules aren't built by default, and may not be > available on UML. Even so, this issue is worth a comment, at the least. > > On my system, lscpu within uml shows all the feature flags that are available > on the host system, which includes AVX extentions. > > Cheers, > Peter Lafreniere > >
On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote: > > From: Anton Ivanov anton.ivanov@cambridgegreys.com > > > Preemption requires saving/restoring FPU state. This patch > adds support for it using GCC intrinsics. > > Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com > > --- > arch/um/Kconfig | 1 - > arch/um/Makefile | 3 +- > arch/um/include/asm/fpu/api.h | 4 +- > arch/um/include/asm/processor-generic.h | 1 + > arch/um/kernel/Makefile | 2 +- > arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++ > 6 files changed, 55 insertions(+), 5 deletions(-) > create mode 100644 arch/um/kernel/fpu.c > > diff --git a/arch/um/Kconfig b/arch/um/Kconfig > index b5e179360534..603f5fd82293 100644 > --- a/arch/um/Kconfig > +++ b/arch/um/Kconfig > @@ -11,7 +11,6 @@ config UML > select ARCH_HAS_KCOV > select ARCH_HAS_STRNCPY_FROM_USER > select ARCH_HAS_STRNLEN_USER > - select ARCH_NO_PREEMPT > select HAVE_ARCH_AUDITSYSCALL > select HAVE_ARCH_KASAN if X86_64 > select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN > diff --git a/arch/um/Makefile b/arch/um/Makefile > index 82f05f250634..35a059baadeb 100644 > --- a/arch/um/Makefile > +++ b/arch/um/Makefile > @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ > $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ > -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ > -Din6addr_loopback=kernel_in6addr_loopback \ > - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr > + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ > + -mxsave Since we're now using a new isa extention, I would add a check in linux_main() with a warning message if xsave is not available. It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled. > > KBUILD_RUSTFLAGS += -Crelocation-model=pie > > diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h > index 71bfd9ef3938..0094624ae9b4 100644 > --- a/arch/um/include/asm/fpu/api.h > +++ b/arch/um/include/asm/fpu/api.h > @@ -8,8 +8,8 @@ > * of x86 optimized copy, xor, etc routines into the > * UML code tree. / > > -#define kernel_fpu_begin() (void)0 > -#define kernel_fpu_end() (void)0 > +void kernel_fpu_begin(void); > +void kernel_fpu_end(void); > > static inline bool irq_fpu_usable(void) > { > diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h > index 7414154b8e9a..1c3287f1a444 100644 > --- a/arch/um/include/asm/processor-generic.h > +++ b/arch/um/include/asm/processor-generic.h > @@ -44,6 +44,7 @@ struct thread_struct { > } cb; > } u; > } request; > + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes / > }; > > #define INIT_THREAD \ > diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile > index 811188be954c..5d9fbaa544be 100644 > --- a/arch/um/kernel/Makefile > +++ b/arch/um/kernel/Makefile > @@ -16,7 +16,7 @@ extra-y := vmlinux.lds > > obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ > physmem.o process.o ptrace.o reboot.o sigio.o \ > - signal.o sysrq.o time.o tlb.o trap.o \ > + signal.o sysrq.o time.o tlb.o trap.o fpu.o\ > um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ > obj-y += load_file.o > > diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c > new file mode 100644 > index 000000000000..4142a08c474d > --- /dev/null > +++ b/arch/um/kernel/fpu.c > @@ -0,0 +1,49 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/ > + * Copyright (C) 2023 Cambridge Greys Ltd > + * Copyright (C) 2023 Red Hat Inc > + */ > + > +#include <linux/cpu.h> > > +#include <linux/init.h> > > +#include <asm/fpu/api.h> > > + > +/* > + * The critical section between kernel_fpu_begin() and kernel_fpu_end() > + * is non-reentrant. It is the caller's responsibility to avoid reentrance. > + */ > + > +static DEFINE_PER_CPU(bool, in_kernel_fpu); > + > +void kernel_fpu_begin(void) > +{ > + preempt_disable(); > + > + WARN_ON(this_cpu_read(in_kernel_fpu)); > + > + this_cpu_write(in_kernel_fpu, true); > + > +#ifdef CONFIG_64BIT > + __builtin_ia32_fxsave64(¤t->thread.fpu); > > +#else > + __builtin_ia32_fxsave(¤t->thread.fpu); > > +#endif > +} > + > +EXPORT_SYMBOL_GPL(kernel_fpu_begin); > + > +void kernel_fpu_end(void) > +{ > + WARN_ON(!this_cpu_read(in_kernel_fpu)); > + > +#ifdef CONFIG_64BIT > + __builtin_ia32_fxrstor64(¤t->thread.fpu); > > +#else > + __builtin_ia32_fxrstor(¤t->thread.fpu); > > +#endif > + this_cpu_write(in_kernel_fpu, false); > + > + preempt_enable(); > +} > +EXPORT_SYMBOL_GPL(kernel_fpu_end); > + According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor instructions do not save ymm, zmm, or mask registers. Please make that explicit in your comment, or replace intrinsics with inline assembler to support those registers. Several x86 crypto routines use these larger registers which could overwrite userspace registers. These modules aren't built by default, and may not be available on UML. Even so, this issue is worth a comment, at the least. On my system, lscpu within uml shows all the feature flags that are available on the host system, which includes AVX extentions. One more thing: With this patch applied, uml builds with defconfig + debug.config fails, but the same build failure does not happen without this patch. A standard defconfig builds fine in both cases. Cheers, Peter Lafreniere
On 20/09/2023 16:04, Peter Lafreniere wrote: > On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote: >> >> From: Anton Ivanov anton.ivanov@cambridgegreys.com >> >> >> Preemption requires saving/restoring FPU state. This patch >> adds support for it using GCC intrinsics. >> >> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com >> >> --- >> arch/um/Kconfig | 1 - >> arch/um/Makefile | 3 +- >> arch/um/include/asm/fpu/api.h | 4 +- >> arch/um/include/asm/processor-generic.h | 1 + >> arch/um/kernel/Makefile | 2 +- >> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++ >> 6 files changed, 55 insertions(+), 5 deletions(-) >> create mode 100644 arch/um/kernel/fpu.c >> >> diff --git a/arch/um/Kconfig b/arch/um/Kconfig >> index b5e179360534..603f5fd82293 100644 >> --- a/arch/um/Kconfig >> +++ b/arch/um/Kconfig >> @@ -11,7 +11,6 @@ config UML >> select ARCH_HAS_KCOV >> select ARCH_HAS_STRNCPY_FROM_USER >> select ARCH_HAS_STRNLEN_USER >> - select ARCH_NO_PREEMPT >> select HAVE_ARCH_AUDITSYSCALL >> select HAVE_ARCH_KASAN if X86_64 >> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN >> diff --git a/arch/um/Makefile b/arch/um/Makefile >> index 82f05f250634..35a059baadeb 100644 >> --- a/arch/um/Makefile >> +++ b/arch/um/Makefile >> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ >> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ >> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ >> -Din6addr_loopback=kernel_in6addr_loopback \ >> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr >> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ >> + -mxsave > > Since we're now using a new isa extention, I would add a check in > linux_main() with a warning message if xsave is not available. > > It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled. > >> >> KBUILD_RUSTFLAGS += -Crelocation-model=pie >> >> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h >> index 71bfd9ef3938..0094624ae9b4 100644 >> --- a/arch/um/include/asm/fpu/api.h >> +++ b/arch/um/include/asm/fpu/api.h >> @@ -8,8 +8,8 @@ >> * of x86 optimized copy, xor, etc routines into the >> * UML code tree. / >> >> -#define kernel_fpu_begin() (void)0 >> -#define kernel_fpu_end() (void)0 >> +void kernel_fpu_begin(void); >> +void kernel_fpu_end(void); >> >> static inline bool irq_fpu_usable(void) >> { >> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h >> index 7414154b8e9a..1c3287f1a444 100644 >> --- a/arch/um/include/asm/processor-generic.h >> +++ b/arch/um/include/asm/processor-generic.h >> @@ -44,6 +44,7 @@ struct thread_struct { >> } cb; >> } u; >> } request; >> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes / >> }; >> >> #define INIT_THREAD \ >> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile >> index 811188be954c..5d9fbaa544be 100644 >> --- a/arch/um/kernel/Makefile >> +++ b/arch/um/kernel/Makefile >> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds >> >> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ >> physmem.o process.o ptrace.o reboot.o sigio.o \ >> - signal.o sysrq.o time.o tlb.o trap.o \ >> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\ >> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ >> obj-y += load_file.o >> >> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c >> new file mode 100644 >> index 000000000000..4142a08c474d >> --- /dev/null >> +++ b/arch/um/kernel/fpu.c >> @@ -0,0 +1,49 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/ >> + * Copyright (C) 2023 Cambridge Greys Ltd >> + * Copyright (C) 2023 Red Hat Inc >> + */ >> + >> +#include <linux/cpu.h> >> >> +#include <linux/init.h> >> >> +#include <asm/fpu/api.h> >> >> + >> +/* >> + * The critical section between kernel_fpu_begin() and kernel_fpu_end() >> + * is non-reentrant. It is the caller's responsibility to avoid reentrance. >> + */ >> + >> +static DEFINE_PER_CPU(bool, in_kernel_fpu); >> + >> +void kernel_fpu_begin(void) >> +{ >> + preempt_disable(); >> + >> + WARN_ON(this_cpu_read(in_kernel_fpu)); >> + >> + this_cpu_write(in_kernel_fpu, true); >> + >> +#ifdef CONFIG_64BIT >> + __builtin_ia32_fxsave64(¤t->thread.fpu); >> >> +#else >> + __builtin_ia32_fxsave(¤t->thread.fpu); >> >> +#endif >> +} >> + >> +EXPORT_SYMBOL_GPL(kernel_fpu_begin); >> + >> +void kernel_fpu_end(void) >> +{ >> + WARN_ON(!this_cpu_read(in_kernel_fpu)); >> + >> +#ifdef CONFIG_64BIT >> + __builtin_ia32_fxrstor64(¤t->thread.fpu); >> >> +#else >> + __builtin_ia32_fxrstor(¤t->thread.fpu); >> >> +#endif >> + this_cpu_write(in_kernel_fpu, false); >> + >> + preempt_enable(); >> +} >> +EXPORT_SYMBOL_GPL(kernel_fpu_end); >> + > > According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor > instructions do not save ymm, zmm, or mask registers. Please make that explicit > in your comment, or replace intrinsics with inline assembler to support those > registers. It may ened up with that :( > > Several x86 crypto routines use these larger registers which could overwrite > userspace registers. These modules aren't built by default, and may not be > available on UML. Even so, this issue is worth a comment, at the least. We did some work to enable some of them. So we may need to save them too. > > On my system, lscpu within uml shows all the feature flags that are available > on the host system, which includes AVX extentions. Yes. The code to reflect features into the guest was added at about the same time as enabling some of the AVX extensions. We could not enable them everywhere, because most of this code in Linux relies on alternatives support and that is not working in UML - it does not have the means to "patch" running code and its "alternatives" are effectively a NOOP. They are definitely enabled for RAID6 IIRC and as well as anything else which does a jump table based on CPU features. > > One more thing: > With this patch applied, uml builds with defconfig + debug.config fails, > but the same build failure does not happen without this patch. A standard > defconfig builds fine in both cases. > Ack. Will look at it. > Cheers, > Peter Lafreniere > >
On 20/09/2023 15:49, Peter Lafreniere wrote: > On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote: >> From: Anton Ivanov anton.ivanov@cambridgegreys.com >> >> >> Preemption requires saving/restoring FPU state. This patch >> adds support for it using GCC intrinsics. >> >> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com >> >> --- >> arch/um/Kconfig | 1 - >> arch/um/Makefile | 3 +- >> arch/um/include/asm/fpu/api.h | 4 +- >> arch/um/include/asm/processor-generic.h | 1 + >> arch/um/kernel/Makefile | 2 +- >> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++ >> 6 files changed, 55 insertions(+), 5 deletions(-) >> create mode 100644 arch/um/kernel/fpu.c >> >> diff --git a/arch/um/Kconfig b/arch/um/Kconfig >> index b5e179360534..603f5fd82293 100644 >> --- a/arch/um/Kconfig >> +++ b/arch/um/Kconfig >> @@ -11,7 +11,6 @@ config UML >> select ARCH_HAS_KCOV >> select ARCH_HAS_STRNCPY_FROM_USER >> select ARCH_HAS_STRNLEN_USER >> - select ARCH_NO_PREEMPT >> select HAVE_ARCH_AUDITSYSCALL >> select HAVE_ARCH_KASAN if X86_64 >> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN >> diff --git a/arch/um/Makefile b/arch/um/Makefile >> index 82f05f250634..35a059baadeb 100644 >> --- a/arch/um/Makefile >> +++ b/arch/um/Makefile >> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ >> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ >> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ >> -Din6addr_loopback=kernel_in6addr_loopback \ >> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr >> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ >> + -mxsave > Since we're now using a new isa extention, I would add a check in > linux_main() with a warning message if xsave is not available. > > It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled. Indeed. The full area ends up being quite large - half a page or thereabouts. > >> KBUILD_RUSTFLAGS += -Crelocation-model=pie >> >> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h >> index 71bfd9ef3938..0094624ae9b4 100644 >> --- a/arch/um/include/asm/fpu/api.h >> +++ b/arch/um/include/asm/fpu/api.h >> @@ -8,8 +8,8 @@ >> * of x86 optimized copy, xor, etc routines into the >> * UML code tree. / >> >> -#define kernel_fpu_begin() (void)0 >> -#define kernel_fpu_end() (void)0 >> +void kernel_fpu_begin(void); >> +void kernel_fpu_end(void); >> >> static inline bool irq_fpu_usable(void) >> { >> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h >> index 7414154b8e9a..1c3287f1a444 100644 >> --- a/arch/um/include/asm/processor-generic.h >> +++ b/arch/um/include/asm/processor-generic.h >> @@ -44,6 +44,7 @@ struct thread_struct { >> } cb; >> } u; >> } request; >> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes / >> }; >> >> #define INIT_THREAD \ >> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile >> index 811188be954c..5d9fbaa544be 100644 >> --- a/arch/um/kernel/Makefile >> +++ b/arch/um/kernel/Makefile >> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds >> >> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ >> physmem.o process.o ptrace.o reboot.o sigio.o \ >> - signal.o sysrq.o time.o tlb.o trap.o \ >> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\ >> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ >> obj-y += load_file.o >> >> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c >> new file mode 100644 >> index 000000000000..4142a08c474d >> --- /dev/null >> +++ b/arch/um/kernel/fpu.c >> @@ -0,0 +1,49 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/ >> + * Copyright (C) 2023 Cambridge Greys Ltd >> + * Copyright (C) 2023 Red Hat Inc >> + */ >> + >> +#include <linux/cpu.h> >> >> +#include <linux/init.h> >> >> +#include <asm/fpu/api.h> >> >> + >> +/* >> + * The critical section between kernel_fpu_begin() and kernel_fpu_end() >> + * is non-reentrant. It is the caller's responsibility to avoid reentrance. >> + */ >> + >> +static DEFINE_PER_CPU(bool, in_kernel_fpu); >> + >> +void kernel_fpu_begin(void) >> +{ >> + preempt_disable(); >> + >> + WARN_ON(this_cpu_read(in_kernel_fpu)); >> + >> + this_cpu_write(in_kernel_fpu, true); >> + >> +#ifdef CONFIG_64BIT >> + __builtin_ia32_fxsave64(¤t->thread.fpu); >> >> +#else >> + __builtin_ia32_fxsave(¤t->thread.fpu); >> >> +#endif >> +} >> + >> +EXPORT_SYMBOL_GPL(kernel_fpu_begin); >> + >> +void kernel_fpu_end(void) >> +{ >> + WARN_ON(!this_cpu_read(in_kernel_fpu)); >> + >> +#ifdef CONFIG_64BIT >> + __builtin_ia32_fxrstor64(¤t->thread.fpu); >> >> +#else >> + __builtin_ia32_fxrstor(¤t->thread.fpu); >> >> +#endif >> + this_cpu_write(in_kernel_fpu, false); >> + >> + preempt_enable(); >> +} >> +EXPORT_SYMBOL_GPL(kernel_fpu_end); >> + > According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor > instructions do not save ymm, zmm, or mask registers. Please make that explicit > in your comment, or replace intrinsics with inline assembler to support those > registers. > > Several x86 crypto routines use these larger registers which could overwrite > userspace registers. These modules aren't built by default, and may not be > available on UML. Even so, this issue is worth a comment, at the least. > > On my system, lscpu within uml shows all the feature flags that are available > on the host system, which includes AVX extentions. That should be xsave, not fxsave upgrading to xsaveopt if available. > > Cheers, > Peter Lafreniere > >
----- Ursprüngliche Mail ----- > Von: "anton ivanov" <anton.ivanov@cambridgegreys.com> > An: "Peter Lafreniere" <peter@n8pjl.ca> > CC: "Johannes Berg" <johannes@sipsolutions.net>, "linux-um" <linux-um@lists.infradead.org>, "richard" <richard@nod.at> > Gesendet: Donnerstag, 21. September 2023 09:12:33 > Betreff: Re: [PATCH] um: Enable preemption in UML > On 20/09/2023 15:49, Peter Lafreniere wrote: >> On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote: >>> From: Anton Ivanov anton.ivanov@cambridgegreys.com >>> >>> >>> Preemption requires saving/restoring FPU state. This patch >>> adds support for it using GCC intrinsics. >>> >>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com I'm not so sure whether we should really directly start with CONFIG_PREEMPT. How about implementing CONFIG_PREEMPT_COUNT first? Thanks, //richard
On 21/09/2023 08:30, Richard Weinberger wrote: > ----- Ursprüngliche Mail ----- >> Von: "anton ivanov" <anton.ivanov@cambridgegreys.com> >> An: "Peter Lafreniere" <peter@n8pjl.ca> >> CC: "Johannes Berg" <johannes@sipsolutions.net>, "linux-um" <linux-um@lists.infradead.org>, "richard" <richard@nod.at> >> Gesendet: Donnerstag, 21. September 2023 09:12:33 >> Betreff: Re: [PATCH] um: Enable preemption in UML >> On 20/09/2023 15:49, Peter Lafreniere wrote: >>> On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote: >>>> From: Anton Ivanov anton.ivanov@cambridgegreys.com >>>> >>>> >>>> Preemption requires saving/restoring FPU state. This patch >>>> adds support for it using GCC intrinsics. >>>> >>>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com > I'm not so sure whether we should really directly start with CONFIG_PREEMPT. I have managed to make it work except the IRQ return bit suggested by Peter. We are quite paranoid regarding locking and we are UP so nearly all of it worked out of the box. > How about implementing CONFIG_PREEMPT_COUNT first? Sure. Will try that today. > > Thanks, > //richard >
diff --git a/arch/um/Kconfig b/arch/um/Kconfig index b5e179360534..603f5fd82293 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -11,7 +11,6 @@ config UML select ARCH_HAS_KCOV select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER - select ARCH_NO_PREEMPT select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN diff --git a/arch/um/Makefile b/arch/um/Makefile index 82f05f250634..35a059baadeb 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ + -mxsave KBUILD_RUSTFLAGS += -Crelocation-model=pie diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h index 71bfd9ef3938..0094624ae9b4 100644 --- a/arch/um/include/asm/fpu/api.h +++ b/arch/um/include/asm/fpu/api.h @@ -8,8 +8,8 @@ * of x86 optimized copy, xor, etc routines into the * UML code tree. */ -#define kernel_fpu_begin() (void)0 -#define kernel_fpu_end() (void)0 +void kernel_fpu_begin(void); +void kernel_fpu_end(void); static inline bool irq_fpu_usable(void) { diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 7414154b8e9a..1c3287f1a444 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -44,6 +44,7 @@ struct thread_struct { } cb; } u; } request; + u8 fpu[512] __aligned(16); /* Intel docs require xsave/xrestore area to be aligned to 16 bytes */ }; #define INIT_THREAD \ diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 811188be954c..5d9fbaa544be 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -16,7 +16,7 @@ extra-y := vmlinux.lds obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ - signal.o sysrq.o time.o tlb.o trap.o \ + signal.o sysrq.o time.o tlb.o trap.o fpu.o\ um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ obj-y += load_file.o diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c new file mode 100644 index 000000000000..4142a08c474d --- /dev/null +++ b/arch/um/kernel/fpu.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Cambridge Greys Ltd + * Copyright (C) 2023 Red Hat Inc + */ + +#include <linux/cpu.h> +#include <linux/init.h> +#include <asm/fpu/api.h> + +/* + * The critical section between kernel_fpu_begin() and kernel_fpu_end() + * is non-reentrant. It is the caller's responsibility to avoid reentrance. + */ + +static DEFINE_PER_CPU(bool, in_kernel_fpu); + +void kernel_fpu_begin(void) +{ + preempt_disable(); + + WARN_ON(this_cpu_read(in_kernel_fpu)); + + this_cpu_write(in_kernel_fpu, true); + +#ifdef CONFIG_64BIT + __builtin_ia32_fxsave64(¤t->thread.fpu); +#else + __builtin_ia32_fxsave(¤t->thread.fpu); +#endif +} + +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + WARN_ON(!this_cpu_read(in_kernel_fpu)); + +#ifdef CONFIG_64BIT + __builtin_ia32_fxrstor64(¤t->thread.fpu); +#else + __builtin_ia32_fxrstor(¤t->thread.fpu); +#endif + this_cpu_write(in_kernel_fpu, false); + + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); +