diff mbox series

powerpc: Don't clobber fr0/vs0 during fp|altivec register save

Message ID 1105090647.48374193.1700351103830.JavaMail.zimbra@raptorengineeringinc.com (mailing list archive)
State Superseded
Headers show
Series powerpc: Don't clobber fr0/vs0 during fp|altivec register save | expand

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 6 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 23 jobs.

Commit Message

Timothy Pearson Nov. 18, 2023, 11:45 p.m. UTC
During floating point and vector save to thread data fr0/vs0 are clobbered
by the FPSCR/VSCR store routine.  This leads to userspace register corruption
and application data corruption / crash under the following rare condition:

 * A userspace thread is executing with VSX/FP mode enabled
 * The userspace thread is making active use of fr0 and/or vs0
 * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
 * The userspace thread is interrupted by the IPI before accessing data it
   previously stored in fr0/vs0
 * The thread being switched in by the IPI has a pending signal

If these exact criteria are met, then the following sequence happens:

 * The existing thread FP storage is still valid before the IPI, due to a
   prior call to save_fpu() or store_fp_state().  Note that the current
   fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
   is now invalid pending a call to restore_fp()/restore_altivec().
 * IPI -- FP/VSX register state remains invalid
 * interrupt_exit_user_prepare_main() calls do_notify_resume(),
   due to the pending signal
 * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
   merrily reads and saves the invalid FP/VSX state to thread local storage.
 * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
   FP/VSX state back to registers.
 * Execution is released to userspace, and the application crashes or corrupts
   data.

Without the pending signal, do_notify_resume() is never called, therefore the
invalid register state does't matter as it is overwritten nearly immeediately
by interrupt_exit_user_prepare_main() calling restore_math() before return
to userspace.

The combination of MariaDB and io_uring is especially good at triggering data
corruption using the above sequence, see MariaDB bug MDEV-30728.

Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and
altivec register save paths.

Tested under QEMU in kvm mode, running on a Talos II workstation with dual
POWER9 DD2.2 CPUs.

Tested-by: Timothy Pearson <tpearson@raptorengineering.com>
Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
---
 arch/powerpc/kernel/fpu.S    | 13 +++++++++++++
 arch/powerpc/kernel/vector.S |  4 ++++
 2 files changed, 17 insertions(+)

Comments

Timothy Pearson Nov. 19, 2023, 12:12 a.m. UTC | #1
----- Original Message -----
> From: "Timothy Pearson" <tpearson@raptorengineering.com>
> To: "Jens Axboe" <axboe@kernel.dk>, "regressions" <regressions@lists.linux.dev>, "Michael Ellerman"
> <mpe@ellerman.id.au>, "npiggin" <npiggin@gmail.com>, "christophe leroy" <christophe.leroy@csgroup.eu>, "linuxppc-dev"
> <linuxppc-dev@lists.ozlabs.org>
> Sent: Saturday, November 18, 2023 5:45:03 PM
> Subject: [PATCH] powerpc: Don't clobber fr0/vs0 during fp|altivec register  save

> During floating point and vector save to thread data fr0/vs0 are clobbered
> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> and application data corruption / crash under the following rare condition:
> 
> * A userspace thread is executing with VSX/FP mode enabled
> * The userspace thread is making active use of fr0 and/or vs0
> * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
> * The userspace thread is interrupted by the IPI before accessing data it
>   previously stored in fr0/vs0
> * The thread being switched in by the IPI has a pending signal
> 
> If these exact criteria are met, then the following sequence happens:
> 
> * The existing thread FP storage is still valid before the IPI, due to a
>   prior call to save_fpu() or store_fp_state().  Note that the current
>   fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
>   is now invalid pending a call to restore_fp()/restore_altivec().
> * IPI -- FP/VSX register state remains invalid
> * interrupt_exit_user_prepare_main() calls do_notify_resume(),
>   due to the pending signal
> * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
>   merrily reads and saves the invalid FP/VSX state to thread local storage.
> * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
>   FP/VSX state back to registers.
> * Execution is released to userspace, and the application crashes or corrupts
>   data.
> 
> Without the pending signal, do_notify_resume() is never called, therefore the
> invalid register state does't matter as it is overwritten nearly immeediately
> by interrupt_exit_user_prepare_main() calling restore_math() before return
> to userspace.
> 
> The combination of MariaDB and io_uring is especially good at triggering data
> corruption using the above sequence, see MariaDB bug MDEV-30728.
> 
> Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and
> altivec register save paths.
> 
> Tested under QEMU in kvm mode, running on a Talos II workstation with dual
> POWER9 DD2.2 CPUs.
> 
> Tested-by: Timothy Pearson <tpearson@raptorengineering.com>
> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
> ---
> arch/powerpc/kernel/fpu.S    | 13 +++++++++++++
> arch/powerpc/kernel/vector.S |  4 ++++
> 2 files changed, 17 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
> index 6a9acfb690c9..2f8f3f93cbb6 100644
> --- a/arch/powerpc/kernel/fpu.S
> +++ b/arch/powerpc/kernel/fpu.S
> @@ -23,6 +23,15 @@
> #include <asm/feature-fixups.h>
> 
> #ifdef CONFIG_VSX
> +#define __REST_1FPVSR(n,c,base)						\
> +BEGIN_FTR_SECTION							\
> +	b	2f;							\
> +END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
> +	REST_FPR(n,base);						\
> +	b	3f;							\
> +2:	REST_VSR(n,c,base);						\
> +3:
> +
> #define __REST_32FPVSRS(n,c,base)					\
> BEGIN_FTR_SECTION							\
> 	b	2f;							\
> @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
> 2:	SAVE_32VSRS(n,c,base);						\
> 3:
> #else
> +#define __REST_1FPVSR(n,b,base)		REST_FPR(n, base)
> #define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
> #define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
> #endif
> +#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
> #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
> #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
> 
> @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state)
> 	SAVE_32FPVSRS(0, R4, R3)
> 	mffs	fr0
> 	stfd	fr0,FPSTATE_FPSCR(r3)
> +	REST_1FPVSR(0, R4, R3)
> 	blr
> EXPORT_SYMBOL(store_fp_state)
> 
> @@ -138,4 +150,5 @@ _GLOBAL(save_fpu)
> 2:	SAVE_32FPVSRS(0, R4, R6)
> 	mffs	fr0
> 	stfd	fr0,FPSTATE_FPSCR(r6)
> +	REST_1FPVSR(0, R4, R6)
> 	blr
> diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
> index 4094e4c4c77a..8c63b05b421e 100644
> --- a/arch/powerpc/kernel/vector.S
> +++ b/arch/powerpc/kernel/vector.S
> @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state)
> 	mfvscr	v0
> 	li	r4, VRSTATE_VSCR
> 	stvx	v0, r4, r3
> +	li	r4, 0
> +	lvx	v0, r4, r3
> 	blr
> EXPORT_SYMBOL(store_vr_state)
> 
> @@ -109,6 +111,8 @@ _GLOBAL(save_altivec)
> 	mfvscr	v0
> 	li	r4,VRSTATE_VSCR
> 	stvx	v0,r4,r7
> +	li	r4,0
> +	lvx	v0,r4,r7
> 	blr
> 
> #ifdef CONFIG_VSX
> --
> 2.39.2

Once this (or an equivalent) is merged upstream, I'll take the stable backport task.

Thanks!
Linux regression tracking (Thorsten Leemhuis) Nov. 19, 2023, 6:08 a.m. UTC | #2
On 19.11.23 00:45, Timothy Pearson wrote:
> During floating point and vector save to thread data fr0/vs0 are clobbered
> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> and application data corruption / crash under the following rare condition:
> [...]
> Tested-by: Timothy Pearson <tpearson@raptorengineering.com>

Many thx for this, good to see you finally found the problem.

FWIW, you might want to add a

 Closes:
https://lore.kernel.org/all/480932026.45576726.1699374859845.JavaMail.zimbra@raptorengineeringinc.com/

here. Yes, I care about those tags because of regression tracking. But
it only relies on Link:/Closes: tags because they were meant to be used
in the first place to link to backstories and details of a change[1].

And you and Jens did such good debugging in that thread, which is why
it's IMHO really worth linking here in case anyone ever needs to look
into the backstory later.

> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
> [..]

Thx again for all your work you put into this.

Ciao, Thorsten

[1] see Documentation/process/submitting-patches.rst
(http://docs.kernel.org/process/submitting-patches.html) and
Documentation/process/5.Posting.rst
(https://docs.kernel.org/process/5.Posting.html)

See also these mails from Linus:
https://lore.kernel.org/all/CAHk-=wjMmSZzMJ3Xnskdg4+GGz=5p5p+GSYyFBTh0f-DgvdBWg@mail.gmail.com/
https://lore.kernel.org/all/CAHk-=wgs38ZrfPvy=nOwVkVzjpM3VFU1zobP37Fwd_h9iAD5JQ@mail.gmail.com/
https://lore.kernel.org/all/CAHk-=wjxzafG-=J8oT30s7upn4RhBs6TX-uVFZ5rME+L5_DoJA@mail.gmail.com/
Gabriel Paubert Nov. 19, 2023, 7:02 a.m. UTC | #3
On Sat, Nov 18, 2023 at 05:45:03PM -0600, Timothy Pearson wrote:
> During floating point and vector save to thread data fr0/vs0 are clobbered
> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> and application data corruption / crash under the following rare condition:
> 
>  * A userspace thread is executing with VSX/FP mode enabled
>  * The userspace thread is making active use of fr0 and/or vs0
>  * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
>  * The userspace thread is interrupted by the IPI before accessing data it
>    previously stored in fr0/vs0
>  * The thread being switched in by the IPI has a pending signal
> 
> If these exact criteria are met, then the following sequence happens:
> 
>  * The existing thread FP storage is still valid before the IPI, due to a
>    prior call to save_fpu() or store_fp_state().  Note that the current
>    fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
>    is now invalid pending a call to restore_fp()/restore_altivec().
>  * IPI -- FP/VSX register state remains invalid
>  * interrupt_exit_user_prepare_main() calls do_notify_resume(),
>    due to the pending signal
>  * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
>    merrily reads and saves the invalid FP/VSX state to thread local storage.
>  * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
>    FP/VSX state back to registers.
>  * Execution is released to userspace, and the application crashes or corrupts
>    data.
> 
> Without the pending signal, do_notify_resume() is never called, therefore the
> invalid register state does't matter as it is overwritten nearly immeediately
> by interrupt_exit_user_prepare_main() calling restore_math() before return
> to userspace.
> 
> The combination of MariaDB and io_uring is especially good at triggering data
> corruption using the above sequence, see MariaDB bug MDEV-30728.
> 
> Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and
> altivec register save paths.
> 
> Tested under QEMU in kvm mode, running on a Talos II workstation with dual
> POWER9 DD2.2 CPUs.
> 
> Tested-by: Timothy Pearson <tpearson@raptorengineering.com>
> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
> ---
>  arch/powerpc/kernel/fpu.S    | 13 +++++++++++++
>  arch/powerpc/kernel/vector.S |  4 ++++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
> index 6a9acfb690c9..2f8f3f93cbb6 100644
> --- a/arch/powerpc/kernel/fpu.S
> +++ b/arch/powerpc/kernel/fpu.S
> @@ -23,6 +23,15 @@
>  #include <asm/feature-fixups.h>
>  
>  #ifdef CONFIG_VSX
> +#define __REST_1FPVSR(n,c,base)						\
> +BEGIN_FTR_SECTION							\
> +	b	2f;							\
> +END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
> +	REST_FPR(n,base);						\
> +	b	3f;							\
> +2:	REST_VSR(n,c,base);						\
> +3:
> +
>  #define __REST_32FPVSRS(n,c,base)					\
>  BEGIN_FTR_SECTION							\
>  	b	2f;							\
> @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
>  2:	SAVE_32VSRS(n,c,base);						\
>  3:
>  #else
> +#define __REST_1FPVSR(n,b,base)		REST_FPR(n, base)
>  #define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
>  #define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
>  #endif
> +#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
>  #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
>  #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
>  
> @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state)
>  	SAVE_32FPVSRS(0, R4, R3)
>  	mffs	fr0
>  	stfd	fr0,FPSTATE_FPSCR(r3)
> +	REST_1FPVSR(0, R4, R3)
>  	blr
>  EXPORT_SYMBOL(store_fp_state)
>  
> @@ -138,4 +150,5 @@ _GLOBAL(save_fpu)
>  2:	SAVE_32FPVSRS(0, R4, R6)
>  	mffs	fr0
>  	stfd	fr0,FPSTATE_FPSCR(r6)
> +	REST_1FPVSR(0, R4, R6)
>  	blr
> diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
> index 4094e4c4c77a..8c63b05b421e 100644
> --- a/arch/powerpc/kernel/vector.S
> +++ b/arch/powerpc/kernel/vector.S
> @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state)
>  	mfvscr	v0
>  	li	r4, VRSTATE_VSCR
>  	stvx	v0, r4, r3
> +	li	r4, 0
> +	lvx	v0, r4, r3

Just a small nit, no need for clearing r4, "lvx v0,0,r3" will do, as all
Power instructions using indexed addressing mode.

>  	blr
>  EXPORT_SYMBOL(store_vr_state)
>  
> @@ -109,6 +111,8 @@ _GLOBAL(save_altivec)
>  	mfvscr	v0
>  	li	r4,VRSTATE_VSCR
>  	stvx	v0,r4,r7
> +	li	r4,0
> +	lvx	v0,r4,r7

Ditto.

And great bug hunt, by the way...

>  	blr
>  
>  #ifdef CONFIG_VSX
> -- 
> 2.39.2

Cheers,
Gabriel
Jens Axboe Nov. 19, 2023, 1:14 p.m. UTC | #4
On 11/18/23 4:45 PM, Timothy Pearson wrote:
> During floating point and vector save to thread data fr0/vs0 are clobbered
> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> and application data corruption / crash under the following rare condition:
> 
>  * A userspace thread is executing with VSX/FP mode enabled
>  * The userspace thread is making active use of fr0 and/or vs0
>  * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
>  * The userspace thread is interrupted by the IPI before accessing data it
>    previously stored in fr0/vs0
>  * The thread being switched in by the IPI has a pending signal
> 
> If these exact criteria are met, then the following sequence happens:
> 
>  * The existing thread FP storage is still valid before the IPI, due to a
>    prior call to save_fpu() or store_fp_state().  Note that the current
>    fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
>    is now invalid pending a call to restore_fp()/restore_altivec().
>  * IPI -- FP/VSX register state remains invalid
>  * interrupt_exit_user_prepare_main() calls do_notify_resume(),
>    due to the pending signal
>  * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
>    merrily reads and saves the invalid FP/VSX state to thread local storage.
>  * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
>    FP/VSX state back to registers.
>  * Execution is released to userspace, and the application crashes or corrupts
>    data.

What an epic bug hunt! Hats off to you for seeing it through and getting
to the bottom of it. Particularly difficult as the commit that made it
easier to trigger was in no way related to where the actual bug was.

I ran this on the vm I have access to, and it survived 2x500 iterations.
Happy to call that good:

Tested-by: Jens Axboe <axboe@kernel.dk>
Salvatore Bonaccorso Nov. 29, 2023, 10:30 a.m. UTC | #5
Hi,

On Sun, Nov 19, 2023 at 06:14:50AM -0700, Jens Axboe wrote:
> On 11/18/23 4:45 PM, Timothy Pearson wrote:
> > During floating point and vector save to thread data fr0/vs0 are clobbered
> > by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> > and application data corruption / crash under the following rare condition:
> > 
> >  * A userspace thread is executing with VSX/FP mode enabled
> >  * The userspace thread is making active use of fr0 and/or vs0
> >  * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
> >  * The userspace thread is interrupted by the IPI before accessing data it
> >    previously stored in fr0/vs0
> >  * The thread being switched in by the IPI has a pending signal
> > 
> > If these exact criteria are met, then the following sequence happens:
> > 
> >  * The existing thread FP storage is still valid before the IPI, due to a
> >    prior call to save_fpu() or store_fp_state().  Note that the current
> >    fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
> >    is now invalid pending a call to restore_fp()/restore_altivec().
> >  * IPI -- FP/VSX register state remains invalid
> >  * interrupt_exit_user_prepare_main() calls do_notify_resume(),
> >    due to the pending signal
> >  * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
> >    merrily reads and saves the invalid FP/VSX state to thread local storage.
> >  * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
> >    FP/VSX state back to registers.
> >  * Execution is released to userspace, and the application crashes or corrupts
> >    data.
> 
> What an epic bug hunt! Hats off to you for seeing it through and getting
> to the bottom of it. Particularly difficult as the commit that made it
> easier to trigger was in no way related to where the actual bug was.
> 
> I ran this on the vm I have access to, and it survived 2x500 iterations.
> Happy to call that good:
> 
> Tested-by: Jens Axboe <axboe@kernel.dk>

Thanks to all involved!

Is this going to land soon in mainline so it can be picked as well for
the affected stable trees?

Regards,
Salvatore
Christophe Leroy Nov. 29, 2023, 10:47 a.m. UTC | #6
Le 29/11/2023 à 11:30, Salvatore Bonaccorso a écrit :
> Hi,
> 
> On Sun, Nov 19, 2023 at 06:14:50AM -0700, Jens Axboe wrote:
>> On 11/18/23 4:45 PM, Timothy Pearson wrote:
>>> During floating point and vector save to thread data fr0/vs0 are clobbered
>>> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
>>> and application data corruption / crash under the following rare condition:
>>>
>>>   * A userspace thread is executing with VSX/FP mode enabled
>>>   * The userspace thread is making active use of fr0 and/or vs0
>>>   * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
>>>   * The userspace thread is interrupted by the IPI before accessing data it
>>>     previously stored in fr0/vs0
>>>   * The thread being switched in by the IPI has a pending signal
>>>
>>> If these exact criteria are met, then the following sequence happens:
>>>
>>>   * The existing thread FP storage is still valid before the IPI, due to a
>>>     prior call to save_fpu() or store_fp_state().  Note that the current
>>>     fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
>>>     is now invalid pending a call to restore_fp()/restore_altivec().
>>>   * IPI -- FP/VSX register state remains invalid
>>>   * interrupt_exit_user_prepare_main() calls do_notify_resume(),
>>>     due to the pending signal
>>>   * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
>>>     merrily reads and saves the invalid FP/VSX state to thread local storage.
>>>   * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
>>>     FP/VSX state back to registers.
>>>   * Execution is released to userspace, and the application crashes or corrupts
>>>     data.
>>
>> What an epic bug hunt! Hats off to you for seeing it through and getting
>> to the bottom of it. Particularly difficult as the commit that made it
>> easier to trigger was in no way related to where the actual bug was.
>>
>> I ran this on the vm I have access to, and it survived 2x500 iterations.
>> Happy to call that good:
>>
>> Tested-by: Jens Axboe <axboe@kernel.dk>
> 
> Thanks to all involved!
> 
> Is this going to land soon in mainline so it can be picked as well for
> the affected stable trees?
> 

This version of the patch has been superseded.

As said by Michael in the relavant thread, the plan is to have version 2 
of this patch in 6.7-rc4, see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/1921539696.48534988.1700407082933.JavaMail.zimbra@raptorengineeringinc.com/

Christophe
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 6a9acfb690c9..2f8f3f93cbb6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -23,6 +23,15 @@ 
 #include <asm/feature-fixups.h>
 
 #ifdef CONFIG_VSX
+#define __REST_1FPVSR(n,c,base)						\
+BEGIN_FTR_SECTION							\
+	b	2f;							\
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
+	REST_FPR(n,base);						\
+	b	3f;							\
+2:	REST_VSR(n,c,base);						\
+3:
+
 #define __REST_32FPVSRS(n,c,base)					\
 BEGIN_FTR_SECTION							\
 	b	2f;							\
@@ -41,9 +50,11 @@  END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
 2:	SAVE_32VSRS(n,c,base);						\
 3:
 #else
+#define __REST_1FPVSR(n,b,base)		REST_FPR(n, base)
 #define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
 #define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
 #endif
+#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
 #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
 #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
 
@@ -67,6 +78,7 @@  _GLOBAL(store_fp_state)
 	SAVE_32FPVSRS(0, R4, R3)
 	mffs	fr0
 	stfd	fr0,FPSTATE_FPSCR(r3)
+	REST_1FPVSR(0, R4, R3)
 	blr
 EXPORT_SYMBOL(store_fp_state)
 
@@ -138,4 +150,5 @@  _GLOBAL(save_fpu)
 2:	SAVE_32FPVSRS(0, R4, R6)
 	mffs	fr0
 	stfd	fr0,FPSTATE_FPSCR(r6)
+	REST_1FPVSR(0, R4, R6)
 	blr
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 4094e4c4c77a..8c63b05b421e 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -33,6 +33,8 @@  _GLOBAL(store_vr_state)
 	mfvscr	v0
 	li	r4, VRSTATE_VSCR
 	stvx	v0, r4, r3
+	li	r4, 0
+	lvx	v0, r4, r3
 	blr
 EXPORT_SYMBOL(store_vr_state)
 
@@ -109,6 +111,8 @@  _GLOBAL(save_altivec)
 	mfvscr	v0
 	li	r4,VRSTATE_VSCR
 	stvx	v0,r4,r7
+	li	r4,0
+	lvx	v0,r4,r7
 	blr
 
 #ifdef CONFIG_VSX