diff mbox series

[v8,1/2] x86-64: Save APX registers in ld.so trampoline

Message ID 20240216151711.2742988-2-hjl.tools@gmail.com
State New
Headers show
Series x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers | expand

Commit Message

H.J. Lu Feb. 16, 2024, 3:17 p.m. UTC
Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
ld.so trampoline.  This fixes BZ #31371.

Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
be used by i386 _dl_tlsdesc_dynamic.
---
 sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 6 deletions(-)

Comments

Noah Goldstein Feb. 24, 2024, 7:01 p.m. UTC | #1
On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> ld.so trampoline.  This fixes BZ #31371.
>
> Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> be used by i386 _dl_tlsdesc_dynamic.
> ---
>  sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 46 insertions(+), 6 deletions(-)
>
> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> index 85d0a8c943..837fd28734 100644
> --- a/sysdeps/x86/sysdep.h
> +++ b/sysdeps/x86/sysdep.h
> @@ -21,14 +21,54 @@
>
>  #include <sysdeps/generic/sysdep.h>
>
> +/* The extended state feature IDs in the state component bitmap.  */
> +#define X86_XSTATE_X87_ID      0
> +#define X86_XSTATE_SSE_ID      1
> +#define X86_XSTATE_AVX_ID      2
> +#define X86_XSTATE_BNDREGS_ID  3
> +#define X86_XSTATE_BNDCFG_ID   4
> +#define X86_XSTATE_K_ID                5
> +#define X86_XSTATE_ZMM_H_ID    6
> +#define X86_XSTATE_ZMM_ID      7
> +#define X86_XSTATE_PKRU_ID     9
> +#define X86_XSTATE_TILECFG_ID  17
> +#define X86_XSTATE_TILEDATA_ID 18
> +#define X86_XSTATE_APX_F_ID    19
> +
> +#ifdef __x86_64__
>  /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
>     space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
> -   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
> -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> -
> -/* Save SSE, AVX, AVX512, mask and bound registers.  */
> -#define STATE_SAVE_MASK \
> -  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> +   aligned to 16 bytes for fxsave and 64 bytes for xsave.
> +
> +   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
> +   are saved on stack without adjusting stack pointer first.  When we
> +   update stack pointer to allocate more space, we need to take the
> +   red-zone into account.  */
> +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> +
> +/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
> +   registers are mutually exclusive.  */
> +# define STATE_SAVE_MASK               \
> +  ((1 << X86_XSTATE_SSE_ID)            \
> +   | (1 << X86_XSTATE_AVX_ID)          \
> +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> +   | (1 << X86_XSTATE_K_ID)            \
> +   | (1 << X86_XSTATE_ZMM_H_ID)        \
> +   | (1 << X86_XSTATE_ZMM_ID)          \
> +   | (1 << X86_XSTATE_APX_F_ID))
> +#else
> +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
> +   doesn't have red-zone, use 0 here.  */
> +# define STATE_SAVE_OFFSET 0
> +
> +/* Save SSE, AVX, AXV512, mask and bound registers.   */
> +# define STATE_SAVE_MASK               \
> +  ((1 << X86_XSTATE_SSE_ID)            \
> +   | (1 << X86_XSTATE_AVX_ID)          \
> +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> +   | (1 << X86_XSTATE_K_ID)            \
> +   | (1 << X86_XSTATE_ZMM_H_ID))
> +#endif
>
>  /* Constants for bits in __x86_string_control:  */
>
> --
> 2.43.0
>


LGTM.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
H.J. Lu March 8, 2024, 8:09 p.m. UTC | #2
On Sat, Feb 24, 2024 at 11:01 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> > ld.so trampoline.  This fixes BZ #31371.
> >
> > Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> > be used by i386 _dl_tlsdesc_dynamic.
> > ---
> >  sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 46 insertions(+), 6 deletions(-)
> >
> > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > index 85d0a8c943..837fd28734 100644
> > --- a/sysdeps/x86/sysdep.h
> > +++ b/sysdeps/x86/sysdep.h
> > @@ -21,14 +21,54 @@
> >
> >  #include <sysdeps/generic/sysdep.h>
> >
> > +/* The extended state feature IDs in the state component bitmap.  */
> > +#define X86_XSTATE_X87_ID      0
> > +#define X86_XSTATE_SSE_ID      1
> > +#define X86_XSTATE_AVX_ID      2
> > +#define X86_XSTATE_BNDREGS_ID  3
> > +#define X86_XSTATE_BNDCFG_ID   4
> > +#define X86_XSTATE_K_ID                5
> > +#define X86_XSTATE_ZMM_H_ID    6
> > +#define X86_XSTATE_ZMM_ID      7
> > +#define X86_XSTATE_PKRU_ID     9
> > +#define X86_XSTATE_TILECFG_ID  17
> > +#define X86_XSTATE_TILEDATA_ID 18
> > +#define X86_XSTATE_APX_F_ID    19
> > +
> > +#ifdef __x86_64__
> >  /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
> >     space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
> > -   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
> > -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> > -
> > -/* Save SSE, AVX, AVX512, mask and bound registers.  */
> > -#define STATE_SAVE_MASK \
> > -  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> > +   aligned to 16 bytes for fxsave and 64 bytes for xsave.
> > +
> > +   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
> > +   are saved on stack without adjusting stack pointer first.  When we
> > +   update stack pointer to allocate more space, we need to take the
> > +   red-zone into account.  */
> > +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> > +
> > +/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
> > +   registers are mutually exclusive.  */
> > +# define STATE_SAVE_MASK               \
> > +  ((1 << X86_XSTATE_SSE_ID)            \
> > +   | (1 << X86_XSTATE_AVX_ID)          \
> > +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> > +   | (1 << X86_XSTATE_K_ID)            \
> > +   | (1 << X86_XSTATE_ZMM_H_ID)        \
> > +   | (1 << X86_XSTATE_ZMM_ID)          \
> > +   | (1 << X86_XSTATE_APX_F_ID))
> > +#else
> > +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
> > +   doesn't have red-zone, use 0 here.  */
> > +# define STATE_SAVE_OFFSET 0
> > +
> > +/* Save SSE, AVX, AXV512, mask and bound registers.   */
> > +# define STATE_SAVE_MASK               \
> > +  ((1 << X86_XSTATE_SSE_ID)            \
> > +   | (1 << X86_XSTATE_AVX_ID)          \
> > +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> > +   | (1 << X86_XSTATE_K_ID)            \
> > +   | (1 << X86_XSTATE_ZMM_H_ID))
> > +#endif
> >
> >  /* Constants for bits in __x86_string_control:  */
> >
> > --
> > 2.43.0
> >
>
>
> LGTM.
> Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>

OK to backport it to release branches?

Thanks.
Noah Goldstein March 9, 2024, 5:39 p.m. UTC | #3
On Fri, Mar 8, 2024 at 2:09 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sat, Feb 24, 2024 at 11:01 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> > > ld.so trampoline.  This fixes BZ #31371.
> > >
> > > Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> > > be used by i386 _dl_tlsdesc_dynamic.
> > > ---
> > >  sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 46 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > > index 85d0a8c943..837fd28734 100644
> > > --- a/sysdeps/x86/sysdep.h
> > > +++ b/sysdeps/x86/sysdep.h
> > > @@ -21,14 +21,54 @@
> > >
> > >  #include <sysdeps/generic/sysdep.h>
> > >
> > > +/* The extended state feature IDs in the state component bitmap.  */
> > > +#define X86_XSTATE_X87_ID      0
> > > +#define X86_XSTATE_SSE_ID      1
> > > +#define X86_XSTATE_AVX_ID      2
> > > +#define X86_XSTATE_BNDREGS_ID  3
> > > +#define X86_XSTATE_BNDCFG_ID   4
> > > +#define X86_XSTATE_K_ID                5
> > > +#define X86_XSTATE_ZMM_H_ID    6
> > > +#define X86_XSTATE_ZMM_ID      7
> > > +#define X86_XSTATE_PKRU_ID     9
> > > +#define X86_XSTATE_TILECFG_ID  17
> > > +#define X86_XSTATE_TILEDATA_ID 18
> > > +#define X86_XSTATE_APX_F_ID    19
> > > +
> > > +#ifdef __x86_64__
> > >  /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
> > >     space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
> > > -   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
> > > -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> > > -
> > > -/* Save SSE, AVX, AVX512, mask and bound registers.  */
> > > -#define STATE_SAVE_MASK \
> > > -  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> > > +   aligned to 16 bytes for fxsave and 64 bytes for xsave.
> > > +
> > > +   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
> > > +   are saved on stack without adjusting stack pointer first.  When we
> > > +   update stack pointer to allocate more space, we need to take the
> > > +   red-zone into account.  */
> > > +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> > > +
> > > +/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
> > > +   registers are mutually exclusive.  */
> > > +# define STATE_SAVE_MASK               \
> > > +  ((1 << X86_XSTATE_SSE_ID)            \
> > > +   | (1 << X86_XSTATE_AVX_ID)          \
> > > +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> > > +   | (1 << X86_XSTATE_K_ID)            \
> > > +   | (1 << X86_XSTATE_ZMM_H_ID)        \
> > > +   | (1 << X86_XSTATE_ZMM_ID)          \
> > > +   | (1 << X86_XSTATE_APX_F_ID))
> > > +#else
> > > +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
> > > +   doesn't have red-zone, use 0 here.  */
> > > +# define STATE_SAVE_OFFSET 0
> > > +
> > > +/* Save SSE, AVX, AXV512, mask and bound registers.   */
> > > +# define STATE_SAVE_MASK               \
> > > +  ((1 << X86_XSTATE_SSE_ID)            \
> > > +   | (1 << X86_XSTATE_AVX_ID)          \
> > > +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> > > +   | (1 << X86_XSTATE_K_ID)            \
> > > +   | (1 << X86_XSTATE_ZMM_H_ID))
> > > +#endif
> > >
> > >  /* Constants for bits in __x86_string_control:  */
> > >
> > > --
> > > 2.43.0
> > >
> >
> >
> > LGTM.
> > Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
>
> OK to backport it to release branches?
>
> Thanks.

Okay with it, but imo this type of system level change should have
a bit longer of being tested in the realworld before backport.
Maybe give it another week first?>
> --
> H.J.
diff mbox series

Patch

diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 85d0a8c943..837fd28734 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -21,14 +21,54 @@ 
 
 #include <sysdeps/generic/sysdep.h>
 
+/* The extended state feature IDs in the state component bitmap.  */
+#define X86_XSTATE_X87_ID	0
+#define X86_XSTATE_SSE_ID	1
+#define X86_XSTATE_AVX_ID	2
+#define X86_XSTATE_BNDREGS_ID	3
+#define X86_XSTATE_BNDCFG_ID	4
+#define X86_XSTATE_K_ID		5
+#define X86_XSTATE_ZMM_H_ID	6
+#define X86_XSTATE_ZMM_ID	7
+#define X86_XSTATE_PKRU_ID	9
+#define X86_XSTATE_TILECFG_ID	17
+#define X86_XSTATE_TILEDATA_ID	18
+#define X86_XSTATE_APX_F_ID	19
+
+#ifdef __x86_64__
 /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
    space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
-   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
-#define STATE_SAVE_OFFSET (8 * 7 + 8)
-
-/* Save SSE, AVX, AVX512, mask and bound registers.  */
-#define STATE_SAVE_MASK \
-  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+   aligned to 16 bytes for fxsave and 64 bytes for xsave.
+
+   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
+   are saved on stack without adjusting stack pointer first.  When we
+   update stack pointer to allocate more space, we need to take the
+   red-zone into account.  */
+# define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
+   registers are mutually exclusive.  */
+# define STATE_SAVE_MASK		\
+  ((1 << X86_XSTATE_SSE_ID)		\
+   | (1 << X86_XSTATE_AVX_ID)		\
+   | (1 << X86_XSTATE_BNDREGS_ID)	\
+   | (1 << X86_XSTATE_K_ID)		\
+   | (1 << X86_XSTATE_ZMM_H_ID) 	\
+   | (1 << X86_XSTATE_ZMM_ID)		\
+   | (1 << X86_XSTATE_APX_F_ID))
+#else
+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
+   doesn't have red-zone, use 0 here.  */
+# define STATE_SAVE_OFFSET 0
+
+/* Save SSE, AVX, AXV512, mask and bound registers.   */
+# define STATE_SAVE_MASK		\
+  ((1 << X86_XSTATE_SSE_ID)		\
+   | (1 << X86_XSTATE_AVX_ID)		\
+   | (1 << X86_XSTATE_BNDREGS_ID)	\
+   | (1 << X86_XSTATE_K_ID)		\
+   | (1 << X86_XSTATE_ZMM_H_ID))
+#endif
 
 /* Constants for bits in __x86_string_control:  */