Message ID | 1105090647.48374193.1700351103830.JavaMail.zimbra@raptorengineeringinc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc: Don't clobber fr0/vs0 during fp|altivec register save | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/github-powerpc_ppctests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_selftests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_sparse | success | Successfully ran 4 jobs. |
snowpatch_ozlabs/github-powerpc_clang | success | Successfully ran 6 jobs. |
snowpatch_ozlabs/github-powerpc_kernel_qemu | success | Successfully ran 23 jobs. |
----- Original Message ----- > From: "Timothy Pearson" <tpearson@raptorengineering.com> > To: "Jens Axboe" <axboe@kernel.dk>, "regressions" <regressions@lists.linux.dev>, "Michael Ellerman" > <mpe@ellerman.id.au>, "npiggin" <npiggin@gmail.com>, "christophe leroy" <christophe.leroy@csgroup.eu>, "linuxppc-dev" > <linuxppc-dev@lists.ozlabs.org> > Sent: Saturday, November 18, 2023 5:45:03 PM > Subject: [PATCH] powerpc: Don't clobber fr0/vs0 during fp|altivec register save > During floating point and vector save to thread data fr0/vs0 are clobbered > by the FPSCR/VSCR store routine. This leads to userspace register corruption > and application data corruption / crash under the following rare condition: > > * A userspace thread is executing with VSX/FP mode enabled > * The userspace thread is making active use of fr0 and/or vs0 > * An IPI is taken in kernel mode, forcing the userspace thread to reschedule > * The userspace thread is interrupted by the IPI before accessing data it > previously stored in fr0/vs0 > * The thread being switched in by the IPI has a pending signal > > If these exact criteria are met, then the following sequence happens: > > * The existing thread FP storage is still valid before the IPI, due to a > prior call to save_fpu() or store_fp_state(). Note that the current > fr0/vs0 registers have been clobbered, so the FP/VSX state in registers > is now invalid pending a call to restore_fp()/restore_altivec(). > * IPI -- FP/VSX register state remains invalid > * interrupt_exit_user_prepare_main() calls do_notify_resume(), > due to the pending signal > * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which > merrily reads and saves the invalid FP/VSX state to thread local storage. > * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid > FP/VSX state back to registers. > * Execution is released to userspace, and the application crashes or corrupts > data. > > Without the pending signal, do_notify_resume() is never called, therefore the > invalid register state does't matter as it is overwritten nearly immeediately > by interrupt_exit_user_prepare_main() calling restore_math() before return > to userspace. > > The combination of MariaDB and io_uring is especially good at triggering data > corruption using the above sequence, see MariaDB bug MDEV-30728. > > Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and > altivec register save paths. > > Tested under QEMU in kvm mode, running on a Talos II workstation with dual > POWER9 DD2.2 CPUs. > > Tested-by: Timothy Pearson <tpearson@raptorengineering.com> > Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> > --- > arch/powerpc/kernel/fpu.S | 13 +++++++++++++ > arch/powerpc/kernel/vector.S | 4 ++++ > 2 files changed, 17 insertions(+) > > diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S > index 6a9acfb690c9..2f8f3f93cbb6 100644 > --- a/arch/powerpc/kernel/fpu.S > +++ b/arch/powerpc/kernel/fpu.S > @@ -23,6 +23,15 @@ > #include <asm/feature-fixups.h> > > #ifdef CONFIG_VSX > +#define __REST_1FPVSR(n,c,base) \ > +BEGIN_FTR_SECTION \ > + b 2f; \ > +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ > + REST_FPR(n,base); \ > + b 3f; \ > +2: REST_VSR(n,c,base); \ > +3: > + > #define __REST_32FPVSRS(n,c,base) \ > BEGIN_FTR_SECTION \ > b 2f; \ > @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ > 2: SAVE_32VSRS(n,c,base); \ > 3: > #else > +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) > #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) > #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) > #endif > +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) > #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) > #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) > > @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) > SAVE_32FPVSRS(0, R4, R3) > mffs fr0 > stfd fr0,FPSTATE_FPSCR(r3) > + REST_1FPVSR(0, R4, R3) > blr > EXPORT_SYMBOL(store_fp_state) > > @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) > 2: SAVE_32FPVSRS(0, R4, R6) > mffs fr0 > stfd fr0,FPSTATE_FPSCR(r6) > + REST_1FPVSR(0, R4, R6) > blr > diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S > index 4094e4c4c77a..8c63b05b421e 100644 > --- a/arch/powerpc/kernel/vector.S > +++ b/arch/powerpc/kernel/vector.S > @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state) > mfvscr v0 > li r4, VRSTATE_VSCR > stvx v0, r4, r3 > + li r4, 0 > + lvx v0, r4, r3 > blr > EXPORT_SYMBOL(store_vr_state) > > @@ -109,6 +111,8 @@ _GLOBAL(save_altivec) > mfvscr v0 > li r4,VRSTATE_VSCR > stvx v0,r4,r7 > + li r4,0 > + lvx v0,r4,r7 > blr > > #ifdef CONFIG_VSX > -- > 2.39.2 Once this (or an equivalent) is merged upstream, I'll take the stable backport task. Thanks!
On 19.11.23 00:45, Timothy Pearson wrote: > During floating point and vector save to thread data fr0/vs0 are clobbered > by the FPSCR/VSCR store routine. This leads to userspace register corruption > and application data corruption / crash under the following rare condition: > [...] > Tested-by: Timothy Pearson <tpearson@raptorengineering.com> Many thx for this, good to see you finally found the problem. FWIW, you might want to add a Closes: https://lore.kernel.org/all/480932026.45576726.1699374859845.JavaMail.zimbra@raptorengineeringinc.com/ here. Yes, I care about those tags because of regression tracking. But it only relies on Link:/Closes: tags because they were meant to be used in the first place to link to backstories and details of a change[1]. And you and Jens did such good debugging in that thread, which is why it's IMHO really worth linking here in case anyone ever needs to look into the backstory later. > Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> > [..] Thx again for all your work you put into this. Ciao, Thorsten [1] see Documentation/process/submitting-patches.rst (http://docs.kernel.org/process/submitting-patches.html) and Documentation/process/5.Posting.rst (https://docs.kernel.org/process/5.Posting.html) See also these mails from Linus: https://lore.kernel.org/all/CAHk-=wjMmSZzMJ3Xnskdg4+GGz=5p5p+GSYyFBTh0f-DgvdBWg@mail.gmail.com/ https://lore.kernel.org/all/CAHk-=wgs38ZrfPvy=nOwVkVzjpM3VFU1zobP37Fwd_h9iAD5JQ@mail.gmail.com/ https://lore.kernel.org/all/CAHk-=wjxzafG-=J8oT30s7upn4RhBs6TX-uVFZ5rME+L5_DoJA@mail.gmail.com/
On Sat, Nov 18, 2023 at 05:45:03PM -0600, Timothy Pearson wrote: > During floating point and vector save to thread data fr0/vs0 are clobbered > by the FPSCR/VSCR store routine. This leads to userspace register corruption > and application data corruption / crash under the following rare condition: > > * A userspace thread is executing with VSX/FP mode enabled > * The userspace thread is making active use of fr0 and/or vs0 > * An IPI is taken in kernel mode, forcing the userspace thread to reschedule > * The userspace thread is interrupted by the IPI before accessing data it > previously stored in fr0/vs0 > * The thread being switched in by the IPI has a pending signal > > If these exact criteria are met, then the following sequence happens: > > * The existing thread FP storage is still valid before the IPI, due to a > prior call to save_fpu() or store_fp_state(). Note that the current > fr0/vs0 registers have been clobbered, so the FP/VSX state in registers > is now invalid pending a call to restore_fp()/restore_altivec(). > * IPI -- FP/VSX register state remains invalid > * interrupt_exit_user_prepare_main() calls do_notify_resume(), > due to the pending signal > * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which > merrily reads and saves the invalid FP/VSX state to thread local storage. > * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid > FP/VSX state back to registers. > * Execution is released to userspace, and the application crashes or corrupts > data. > > Without the pending signal, do_notify_resume() is never called, therefore the > invalid register state does't matter as it is overwritten nearly immeediately > by interrupt_exit_user_prepare_main() calling restore_math() before return > to userspace. > > The combination of MariaDB and io_uring is especially good at triggering data > corruption using the above sequence, see MariaDB bug MDEV-30728. > > Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and > altivec register save paths. > > Tested under QEMU in kvm mode, running on a Talos II workstation with dual > POWER9 DD2.2 CPUs. > > Tested-by: Timothy Pearson <tpearson@raptorengineering.com> > Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> > --- > arch/powerpc/kernel/fpu.S | 13 +++++++++++++ > arch/powerpc/kernel/vector.S | 4 ++++ > 2 files changed, 17 insertions(+) > > diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S > index 6a9acfb690c9..2f8f3f93cbb6 100644 > --- a/arch/powerpc/kernel/fpu.S > +++ b/arch/powerpc/kernel/fpu.S > @@ -23,6 +23,15 @@ > #include <asm/feature-fixups.h> > > #ifdef CONFIG_VSX > +#define __REST_1FPVSR(n,c,base) \ > +BEGIN_FTR_SECTION \ > + b 2f; \ > +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ > + REST_FPR(n,base); \ > + b 3f; \ > +2: REST_VSR(n,c,base); \ > +3: > + > #define __REST_32FPVSRS(n,c,base) \ > BEGIN_FTR_SECTION \ > b 2f; \ > @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ > 2: SAVE_32VSRS(n,c,base); \ > 3: > #else > +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) > #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) > #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) > #endif > +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) > #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) > #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) > > @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) > SAVE_32FPVSRS(0, R4, R3) > mffs fr0 > stfd fr0,FPSTATE_FPSCR(r3) > + REST_1FPVSR(0, R4, R3) > blr > EXPORT_SYMBOL(store_fp_state) > > @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) > 2: SAVE_32FPVSRS(0, R4, R6) > mffs fr0 > stfd fr0,FPSTATE_FPSCR(r6) > + REST_1FPVSR(0, R4, R6) > blr > diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S > index 4094e4c4c77a..8c63b05b421e 100644 > --- a/arch/powerpc/kernel/vector.S > +++ b/arch/powerpc/kernel/vector.S > @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state) > mfvscr v0 > li r4, VRSTATE_VSCR > stvx v0, r4, r3 > + li r4, 0 > + lvx v0, r4, r3 Just a small nit, no need for clearing r4, "lvx v0,0,r3" will do, as all Power instructions using indexed addressing mode. > blr > EXPORT_SYMBOL(store_vr_state) > > @@ -109,6 +111,8 @@ _GLOBAL(save_altivec) > mfvscr v0 > li r4,VRSTATE_VSCR > stvx v0,r4,r7 > + li r4,0 > + lvx v0,r4,r7 Ditto. And great bug hunt, by the way... > blr > > #ifdef CONFIG_VSX > -- > 2.39.2 Cheers, Gabriel
On 11/18/23 4:45 PM, Timothy Pearson wrote: > During floating point and vector save to thread data fr0/vs0 are clobbered > by the FPSCR/VSCR store routine. This leads to userspace register corruption > and application data corruption / crash under the following rare condition: > > * A userspace thread is executing with VSX/FP mode enabled > * The userspace thread is making active use of fr0 and/or vs0 > * An IPI is taken in kernel mode, forcing the userspace thread to reschedule > * The userspace thread is interrupted by the IPI before accessing data it > previously stored in fr0/vs0 > * The thread being switched in by the IPI has a pending signal > > If these exact criteria are met, then the following sequence happens: > > * The existing thread FP storage is still valid before the IPI, due to a > prior call to save_fpu() or store_fp_state(). Note that the current > fr0/vs0 registers have been clobbered, so the FP/VSX state in registers > is now invalid pending a call to restore_fp()/restore_altivec(). > * IPI -- FP/VSX register state remains invalid > * interrupt_exit_user_prepare_main() calls do_notify_resume(), > due to the pending signal > * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which > merrily reads and saves the invalid FP/VSX state to thread local storage. > * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid > FP/VSX state back to registers. > * Execution is released to userspace, and the application crashes or corrupts > data. What an epic bug hunt! Hats off to you for seeing it through and getting to the bottom of it. Particularly difficult as the commit that made it easier to trigger was in no way related to where the actual bug was. I ran this on the vm I have access to, and it survived 2x500 iterations. Happy to call that good: Tested-by: Jens Axboe <axboe@kernel.dk>
Hi, On Sun, Nov 19, 2023 at 06:14:50AM -0700, Jens Axboe wrote: > On 11/18/23 4:45 PM, Timothy Pearson wrote: > > During floating point and vector save to thread data fr0/vs0 are clobbered > > by the FPSCR/VSCR store routine. This leads to userspace register corruption > > and application data corruption / crash under the following rare condition: > > > > * A userspace thread is executing with VSX/FP mode enabled > > * The userspace thread is making active use of fr0 and/or vs0 > > * An IPI is taken in kernel mode, forcing the userspace thread to reschedule > > * The userspace thread is interrupted by the IPI before accessing data it > > previously stored in fr0/vs0 > > * The thread being switched in by the IPI has a pending signal > > > > If these exact criteria are met, then the following sequence happens: > > > > * The existing thread FP storage is still valid before the IPI, due to a > > prior call to save_fpu() or store_fp_state(). Note that the current > > fr0/vs0 registers have been clobbered, so the FP/VSX state in registers > > is now invalid pending a call to restore_fp()/restore_altivec(). > > * IPI -- FP/VSX register state remains invalid > > * interrupt_exit_user_prepare_main() calls do_notify_resume(), > > due to the pending signal > > * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which > > merrily reads and saves the invalid FP/VSX state to thread local storage. > > * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid > > FP/VSX state back to registers. > > * Execution is released to userspace, and the application crashes or corrupts > > data. > > What an epic bug hunt! Hats off to you for seeing it through and getting > to the bottom of it. Particularly difficult as the commit that made it > easier to trigger was in no way related to where the actual bug was. > > I ran this on the vm I have access to, and it survived 2x500 iterations. > Happy to call that good: > > Tested-by: Jens Axboe <axboe@kernel.dk> Thanks to all involved! Is this going to land soon in mainline so it can be picked as well for the affected stable trees? Regards, Salvatore
Le 29/11/2023 à 11:30, Salvatore Bonaccorso a écrit : > Hi, > > On Sun, Nov 19, 2023 at 06:14:50AM -0700, Jens Axboe wrote: >> On 11/18/23 4:45 PM, Timothy Pearson wrote: >>> During floating point and vector save to thread data fr0/vs0 are clobbered >>> by the FPSCR/VSCR store routine. This leads to userspace register corruption >>> and application data corruption / crash under the following rare condition: >>> >>> * A userspace thread is executing with VSX/FP mode enabled >>> * The userspace thread is making active use of fr0 and/or vs0 >>> * An IPI is taken in kernel mode, forcing the userspace thread to reschedule >>> * The userspace thread is interrupted by the IPI before accessing data it >>> previously stored in fr0/vs0 >>> * The thread being switched in by the IPI has a pending signal >>> >>> If these exact criteria are met, then the following sequence happens: >>> >>> * The existing thread FP storage is still valid before the IPI, due to a >>> prior call to save_fpu() or store_fp_state(). Note that the current >>> fr0/vs0 registers have been clobbered, so the FP/VSX state in registers >>> is now invalid pending a call to restore_fp()/restore_altivec(). >>> * IPI -- FP/VSX register state remains invalid >>> * interrupt_exit_user_prepare_main() calls do_notify_resume(), >>> due to the pending signal >>> * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which >>> merrily reads and saves the invalid FP/VSX state to thread local storage. >>> * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid >>> FP/VSX state back to registers. >>> * Execution is released to userspace, and the application crashes or corrupts >>> data. >> >> What an epic bug hunt! Hats off to you for seeing it through and getting >> to the bottom of it. Particularly difficult as the commit that made it >> easier to trigger was in no way related to where the actual bug was. >> >> I ran this on the vm I have access to, and it survived 2x500 iterations. >> Happy to call that good: >> >> Tested-by: Jens Axboe <axboe@kernel.dk> > > Thanks to all involved! > > Is this going to land soon in mainline so it can be picked as well for > the affected stable trees? > This version of the patch has been superseded. As said by Michael in the relavant thread, the plan is to have version 2 of this patch in 6.7-rc4, see https://patchwork.ozlabs.org/project/linuxppc-dev/patch/1921539696.48534988.1700407082933.JavaMail.zimbra@raptorengineeringinc.com/ Christophe
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6a9acfb690c9..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,15 @@ #include <asm/feature-fixups.h> #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 4094e4c4c77a..8c63b05b421e 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + li r4, 0 + lvx v0, r4, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +111,8 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + li r4,0 + lvx v0,r4,r7 blr #ifdef CONFIG_VSX