Message ID | 20240318134016.820218-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v6] x86-64: Allocate state buffer space for RDI, RSI and RBX | expand |
On Mon, Mar 18, 2024 at 6:40 AM H.J. Lu <hjl.tools@gmail.com> wrote: > _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack. > After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define > TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX > to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to > STATE_SAVE_OFFSET(%rsp). > > +==================+<- stack frame start aligned at 8 or 16 bytes > | |<- RDI saved in the red zone > | |<- RSI saved in the red zone > | |<- RBX saved in the red zone > | |<- paddings for stack realignment of 64 bytes > |------------------|<- xsave buffer end aligned at 64 bytes > | |<- > | |<- > | |<- > |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) > | |<- 8-byte padding for 64-byte alignment > | |<- 8-byte padding for 64-byte alignment > | |<- R11 > | |<- R10 > | |<- R9 > | |<- R8 > | |<- RDX > | |<- RCX > +==================+<- RSP aligned at 64 bytes > > Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size > for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI > and RBX are saved onto stack without adjusting stack pointer first, using > the red-zone. This fixes BZ #31501. > --- > sysdeps/x86/cpu-features.c | 11 ++-- > sysdeps/x86/sysdep.h | 60 ++++++++++++++++++--- > sysdeps/x86_64/tst-gnu2-tls2mod1.S | 87 ++++++++++++++++++++++++++++++ > 3 files changed, 147 insertions(+), 11 deletions(-) > create mode 100644 sysdeps/x86_64/tst-gnu2-tls2mod1.S > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c > index 4ea373dffa..3d7c2819d7 100644 > --- a/sysdeps/x86/cpu-features.c > +++ b/sysdeps/x86/cpu-features.c > @@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features) > /* NB: On AMX capable processors, ebx always includes AMX > states. */ > unsigned int xsave_state_full_size > - = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); > + = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64); > > cpu_features->xsave_state_size > = xsave_state_full_size; > @@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features) > unsigned int amx_size > = (xstate_amx_comp_offsets[31] > + xstate_amx_comp_sizes[31]); > - amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET, > - 64); > + amx_size > + = ALIGN_UP ((amx_size > + + TLSDESC_CALL_REGISTER_SAVE_AREA), > + 64); > /* Set xsave_state_full_size to the compact AMX > state size for XSAVEC. NB: xsave_state_full_size > is only used in _dl_tlsdesc_dynamic_xsave and > @@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features) > cpu_features->xsave_state_full_size = amx_size; > #endif > cpu_features->xsave_state_size > - = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); > + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, > + 64); > CPU_FEATURE_SET (cpu_features, XSAVEC); > } > } > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h > index db8e576e91..7359149e17 100644 > --- a/sysdeps/x86/sysdep.h > +++ b/sysdeps/x86/sysdep.h > @@ -38,14 +38,59 @@ > #ifdef __x86_64__ > /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need > space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be > - aligned to 16 bytes for fxsave and 64 bytes for xsave. > - > - NB: Is is non-zero because of the 128-byte red-zone. Some registers > - are saved on stack without adjusting stack pointer first. When we > - update stack pointer to allocate more space, we need to take the > - red-zone into account. */ > + aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero > + because MOV, instead of PUSH, is used to save registers onto stack. > + > + +==================+<- stack frame start aligned at 8 or 16 bytes > + | |<- paddings for stack realignment of 64 bytes > + |------------------|<- xsave buffer end aligned at 64 bytes > + | |<- > + | |<- > + | |<- > + |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) > + | |<- 8-byte padding for 64-byte alignment > + | |<- R9 > + | |<- R8 > + | |<- RDI > + | |<- RSI > + | |<- RDX > + | |<- RCX > + | |<- RAX > + +==================+<- RSP aligned at 64 bytes > + > + */ > # define STATE_SAVE_OFFSET (8 * 7 + 8) > > +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning > + stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and > + R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved > + RDI, RSI and RBX values on stack by xsave. > + > + +==================+<- stack frame start aligned at 8 or 16 bytes > + | |<- RDI saved in the red zone > + | |<- RSI saved in the red zone > + | |<- RBX saved in the red zone > + | |<- paddings for stack realignment of 64 bytes > + |------------------|<- xsave buffer end aligned at 64 bytes > + | |<- > + | |<- > + | |<- > + |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) > + | |<- 8-byte padding for 64-byte alignment > + | |<- 8-byte padding for 64-byte alignment > + | |<- R11 > + | |<- R10 > + | |<- R9 > + | |<- R8 > + | |<- RDX > + | |<- RCX > + +==================+<- RSP aligned at 64 bytes > + > + Define the total register save area size for all integer registers by > + adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto > + stack without adjusting stack pointer first, using the red-zone. */ > +# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24) > + > /* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX > registers are mutually exclusive. */ > # define STATE_SAVE_MASK \ > @@ -66,8 +111,9 @@ > (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK) > #else > /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 > - doesn't have red-zone, use 0 here. */ > + uses PUSH to save registers onto stack, use 0 here. */ > # define STATE_SAVE_OFFSET 0 > +# define TLSDESC_CALL_REGISTER_SAVE_AREA 0 > > /* Save SSE, AVX, AXV512, mask and bound registers. */ > # define STATE_SAVE_MASK \ > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S > b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > new file mode 100644 > index 0000000000..1d636669ba > --- /dev/null > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > @@ -0,0 +1,87 @@ > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > + > +/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec > + to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size > + is 960 bytes and this test didn't fail. It may be due to the unused > + last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and > + this test might fail without the fix. */ > +#ifndef OFFSET > +# define OFFSET 40 > +#endif > + > + .text > + .p2align 4 > + .globl apply_tls > + .type apply_tls, @function > +apply_tls: > + cfi_startproc > + _CET_ENDBR > + pushq %rbp > + cfi_def_cfa_offset (16) > + cfi_offset (6, -16) > + movdqu (%RDI_LP), %xmm0 > + lea tls_var1@TLSDESC(%rip), %RAX_LP > + mov %RSP_LP, %RBP_LP > + cfi_def_cfa_register (6) > + /* Align stack to 64 bytes. */ > + and $-64, %RSP_LP > + sub $OFFSET, %RSP_LP > + pushq %rbx > + /* Set %ebx to 0xbadbeef. */ > + movl $0xbadbeef, %ebx > + movl $0xbadbeef, %esi > + movq %rdi, saved_rdi(%rip) > + movq %rsi, saved_rsi(%rip) > + call *tls_var1@TLSCALL(%RAX_LP) > + /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */ > + cmpq saved_rdi(%rip), %rdi > + jne L(hlt) > + cmpq saved_rsi(%rip), %rsi > + jne L(hlt) > + cmpl $0xbadbeef, %ebx > + jne L(hlt) > + add %fs:0, %RAX_LP > + movups %xmm0, 32(%RAX_LP) > + movdqu 16(%RDI_LP), %xmm1 > + mov %RAX_LP, %RBX_LP > + movups %xmm1, 48(%RAX_LP) > + lea 32(%RBX_LP), %RAX_LP > + pop %rbx > + leave > + cfi_def_cfa (7, 8) > + ret > +L(hlt): > + hlt > + cfi_endproc > + .size apply_tls, .-apply_tls > + .hidden tls_var1 > + .globl tls_var1 > + .section .tbss,"awT",@nobits > + .align 16 > + .type tls_var1, @object > + .size tls_var1, 3200 > +tls_var1: > + .zero 3200 > + .local saved_rdi > + .comm saved_rdi,8,8 > + .local saved_rsi > + .comm saved_rsi,8,8 > + .section .note.GNU-stack,"",@progbits > -- > 2.44.0 > > LGTM Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
On Mär 18 2024, H.J. Lu wrote: > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > new file mode 100644 > index 0000000000..1d636669ba > --- /dev/null > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > @@ -0,0 +1,87 @@ > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> A testsuite module cannot include non-installed headers.
On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote: > > On Mär 18 2024, H.J. Lu wrote: > > > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > > new file mode 100644 > > index 0000000000..1d636669ba > > --- /dev/null > > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > > @@ -0,0 +1,87 @@ > > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. > > + Copyright (C) 2024 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <http://www.gnu.org/licenses/>. */ > > + > > +#include <sysdep.h> > > A testsuite module cannot include non-installed headers. > This is a very special case. Does it cause any issues?
On Mär 19 2024, H.J. Lu wrote: > On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote: >> >> On Mär 18 2024, H.J. Lu wrote: >> >> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S >> > new file mode 100644 >> > index 0000000000..1d636669ba >> > --- /dev/null >> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S >> > @@ -0,0 +1,87 @@ >> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. >> > + Copyright (C) 2024 Free Software Foundation, Inc. >> > + This file is part of the GNU C Library. >> > + >> > + The GNU C Library is free software; you can redistribute it and/or >> > + modify it under the terms of the GNU Lesser General Public >> > + License as published by the Free Software Foundation; either >> > + version 2.1 of the License, or (at your option) any later version. >> > + >> > + The GNU C Library is distributed in the hope that it will be useful, >> > + but WITHOUT ANY WARRANTY; without even the implied warranty of >> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> > + Lesser General Public License for more details. >> > + >> > + You should have received a copy of the GNU Lesser General Public >> > + License along with the GNU C Library; if not, see >> > + <http://www.gnu.org/licenses/>. */ >> > + >> > +#include <sysdep.h> >> >> A testsuite module cannot include non-installed headers. >> > > This is a very special case. Does it cause any issues? sysdep.h generally depends on definitions in config.h.
On Tue, Mar 19, 2024 at 4:22 AM Andreas Schwab <schwab@suse.de> wrote: > > On Mär 19 2024, H.J. Lu wrote: > > > On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote: > >> > >> On Mär 18 2024, H.J. Lu wrote: > >> > >> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > >> > new file mode 100644 > >> > index 0000000000..1d636669ba > >> > --- /dev/null > >> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > >> > @@ -0,0 +1,87 @@ > >> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. > >> > + Copyright (C) 2024 Free Software Foundation, Inc. > >> > + This file is part of the GNU C Library. > >> > + > >> > + The GNU C Library is free software; you can redistribute it and/or > >> > + modify it under the terms of the GNU Lesser General Public > >> > + License as published by the Free Software Foundation; either > >> > + version 2.1 of the License, or (at your option) any later version. > >> > + > >> > + The GNU C Library is distributed in the hope that it will be useful, > >> > + but WITHOUT ANY WARRANTY; without even the implied warranty of > >> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > >> > + Lesser General Public License for more details. > >> > + > >> > + You should have received a copy of the GNU Lesser General Public > >> > + License along with the GNU C Library; if not, see > >> > + <http://www.gnu.org/licenses/>. */ > >> > + > >> > +#include <sysdep.h> > >> > >> A testsuite module cannot include non-installed headers. > >> > > > > This is a very special case. Does it cause any issues? > > sysdep.h generally depends on definitions in config.h. > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from sysdep.h. Do they depend on config.h?
On Mär 19 2024, H.J. Lu wrote: > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from > sysdep.h. Do they depend on config.h? No, but other parts may depend on it that can cause compilation errors.
On Tue, Mar 19, 2024 at 4:46 AM Andreas Schwab <schwab@suse.de> wrote: > > On Mär 19 2024, H.J. Lu wrote: > > > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from > > sysdep.h. Do they depend on config.h? > > No, but other parts may depend on it that can cause compilation errors. > Which configure options will cause compilation errors?
On Mär 19 2024, H.J. Lu wrote: > On Tue, Mar 19, 2024 at 4:46 AM Andreas Schwab <schwab@suse.de> wrote: >> >> On Mär 19 2024, H.J. Lu wrote: >> >> > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from >> > sysdep.h. Do they depend on config.h? >> >> No, but other parts may depend on it that can cause compilation errors. >> > > Which configure options will cause compilation errors? It's not upstream, you can see it here: https://build.opensuse.org/package/live_build_log/home:Andreas_Schwab:glibc/glibc:testsuite/f/x86_64
On Tue, Mar 19, 2024 at 5:03 AM Andreas Schwab <schwab@suse.de> wrote: > > On Mär 19 2024, H.J. Lu wrote: > > > On Tue, Mar 19, 2024 at 4:46 AM Andreas Schwab <schwab@suse.de> wrote: > >> > >> On Mär 19 2024, H.J. Lu wrote: > >> > >> > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from > >> > sysdep.h. Do they depend on config.h? > >> > >> No, but other parts may depend on it that can cause compilation errors. > >> > > > > Which configure options will cause compilation errors? > > It's not upstream, you can see it here: > > https://build.opensuse.org/package/live_build_log/home:Andreas_Schwab:glibc/glibc:testsuite/f/x86_64 > Please open a glibc bug report with steps to reproduce the issue.
* Andreas Schwab: > On Mär 19 2024, H.J. Lu wrote: > >> On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote: >>> >>> On Mär 18 2024, H.J. Lu wrote: >>> >>> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S >>> > new file mode 100644 >>> > index 0000000000..1d636669ba >>> > --- /dev/null >>> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S >>> > @@ -0,0 +1,87 @@ >>> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. >>> > + Copyright (C) 2024 Free Software Foundation, Inc. >>> > + This file is part of the GNU C Library. >>> > + >>> > + The GNU C Library is free software; you can redistribute it and/or >>> > + modify it under the terms of the GNU Lesser General Public >>> > + License as published by the Free Software Foundation; either >>> > + version 2.1 of the License, or (at your option) any later version. >>> > + >>> > + The GNU C Library is distributed in the hope that it will be useful, >>> > + but WITHOUT ANY WARRANTY; without even the implied warranty of >>> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> > + Lesser General Public License for more details. >>> > + >>> > + You should have received a copy of the GNU Lesser General Public >>> > + License along with the GNU C Library; if not, see >>> > + <http://www.gnu.org/licenses/>. */ >>> > + >>> > +#include <sysdep.h> >>> >>> A testsuite module cannot include non-installed headers. >>> >> >> This is a very special case. Does it cause any issues? > > sysdep.h generally depends on definitions in config.h. It seems to be a downstream-specific patch: [ 1518s] In file included from ../sysdeps/unix/x86_64/sysdep.h:19, [ 1518s] from ../sysdeps/unix/sysv/linux/x86_64/sysdep.h:23, [ 1518s] from ../sysdeps/x86_64/tst-gnu2-tls2mod1.S:19: [ 1518s] ../sysdeps/x86_64/sysdep.h:56:5: error: "ENABLE_USERSPACE_LIVEPATCH" is not defined, evaluates to 0 [-Werror=undef] [ 1518s] 56 | #if ENABLE_USERSPACE_LIVEPATCH [ 1518s] | ^~~~~~~~~~~~~~~~~~~~~~~~~~ [ 1518s] cc1: some warnings being treated as errors I think the module using <sysdep.h> needs to be added to the test-internal-extras Makefile variable. Thanks, Florian
On Tue, Mar 19, 2024 at 5:17 AM Florian Weimer <fweimer@redhat.com> wrote: > > * Andreas Schwab: > > > On Mär 19 2024, H.J. Lu wrote: > > > >> On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote: > >>> > >>> On Mär 18 2024, H.J. Lu wrote: > >>> > >>> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > >>> > new file mode 100644 > >>> > index 0000000000..1d636669ba > >>> > --- /dev/null > >>> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S > >>> > @@ -0,0 +1,87 @@ > >>> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. > >>> > + Copyright (C) 2024 Free Software Foundation, Inc. > >>> > + This file is part of the GNU C Library. > >>> > + > >>> > + The GNU C Library is free software; you can redistribute it and/or > >>> > + modify it under the terms of the GNU Lesser General Public > >>> > + License as published by the Free Software Foundation; either > >>> > + version 2.1 of the License, or (at your option) any later version. > >>> > + > >>> > + The GNU C Library is distributed in the hope that it will be useful, > >>> > + but WITHOUT ANY WARRANTY; without even the implied warranty of > >>> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > >>> > + Lesser General Public License for more details. > >>> > + > >>> > + You should have received a copy of the GNU Lesser General Public > >>> > + License along with the GNU C Library; if not, see > >>> > + <http://www.gnu.org/licenses/>. */ > >>> > + > >>> > +#include <sysdep.h> > >>> > >>> A testsuite module cannot include non-installed headers. > >>> > >> > >> This is a very special case. Does it cause any issues? > > > > sysdep.h generally depends on definitions in config.h. > > It seems to be a downstream-specific patch: > > [ 1518s] In file included from ../sysdeps/unix/x86_64/sysdep.h:19, > [ 1518s] from ../sysdeps/unix/sysv/linux/x86_64/sysdep.h:23, > [ 1518s] from ../sysdeps/x86_64/tst-gnu2-tls2mod1.S:19: > [ 1518s] ../sysdeps/x86_64/sysdep.h:56:5: error: "ENABLE_USERSPACE_LIVEPATCH" is not defined, evaluates to 0 [-Werror=undef] > [ 1518s] 56 | #if ENABLE_USERSPACE_LIVEPATCH > [ 1518s] | ^~~~~~~~~~~~~~~~~~~~~~~~~~ > [ 1518s] cc1: some warnings being treated as errors > > I think the module using <sysdep.h> needs to be added to the > test-internal-extras Makefile variable. I will fix it if I can reproduce it. If a glibc patch is needed, I will need it.
You can see that patch here: https://build.opensuse.org/projects/home:Andreas_Schwab:glibc/packages/glibc/files/ulp-prologue-into-asm-functions.patch
This works for me: diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 66b21954f3..39048b12e0 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -217,6 +217,8 @@ valgrind-suppressions-tst-valgrind-smoke = \ --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp endif +test-internal-extras += tst-gnu2-tls2mod1 + endif # $(subdir) == elf ifeq ($(subdir),csu)
On Tue, Mar 19, 2024 at 5:51 AM Andreas Schwab <schwab@suse.de> wrote: > > This works for me: > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile > index 66b21954f3..39048b12e0 100644 > --- a/sysdeps/x86_64/Makefile > +++ b/sysdeps/x86_64/Makefile > @@ -217,6 +217,8 @@ valgrind-suppressions-tst-valgrind-smoke = \ > --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp > endif > > +test-internal-extras += tst-gnu2-tls2mod1 > + > endif # $(subdir) == elf > > ifeq ($(subdir),csu) > -- > 2.44.0 > LGTM. Thanks.
On Tue, Mar 19, 2024 at 5:53 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Tue, Mar 19, 2024 at 5:51 AM Andreas Schwab <schwab@suse.de> wrote: > > > > This works for me: > > > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile > > index 66b21954f3..39048b12e0 100644 > > --- a/sysdeps/x86_64/Makefile > > +++ b/sysdeps/x86_64/Makefile > > @@ -217,6 +217,8 @@ valgrind-suppressions-tst-valgrind-smoke = \ > > --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp > > endif > > > > +test-internal-extras += tst-gnu2-tls2mod1 > > + > > endif # $(subdir) == elf > > > > ifeq ($(subdir),csu) > > -- > > 2.44.0 > > > > LGTM. > Please mention BZ #31501 in your commit log. It should be included in all BZ #31501 backports. Thanks.
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 4ea373dffa..3d7c2819d7 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features) /* NB: On AMX capable processors, ebx always includes AMX states. */ unsigned int xsave_state_full_size - = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); + = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64); cpu_features->xsave_state_size = xsave_state_full_size; @@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features) unsigned int amx_size = (xstate_amx_comp_offsets[31] + xstate_amx_comp_sizes[31]); - amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET, - 64); + amx_size + = ALIGN_UP ((amx_size + + TLSDESC_CALL_REGISTER_SAVE_AREA), + 64); /* Set xsave_state_full_size to the compact AMX state size for XSAVEC. NB: xsave_state_full_size is only used in _dl_tlsdesc_dynamic_xsave and @@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features) cpu_features->xsave_state_full_size = amx_size; #endif cpu_features->xsave_state_size - = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, + 64); CPU_FEATURE_SET (cpu_features, XSAVEC); } } diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index db8e576e91..7359149e17 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -38,14 +38,59 @@ #ifdef __x86_64__ /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be - aligned to 16 bytes for fxsave and 64 bytes for xsave. - - NB: Is is non-zero because of the 128-byte red-zone. Some registers - are saved on stack without adjusting stack pointer first. When we - update stack pointer to allocate more space, we need to take the - red-zone into account. */ + aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero + because MOV, instead of PUSH, is used to save registers onto stack. + + +==================+<- stack frame start aligned at 8 or 16 bytes + | |<- paddings for stack realignment of 64 bytes + |------------------|<- xsave buffer end aligned at 64 bytes + | |<- + | |<- + | |<- + |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) + | |<- 8-byte padding for 64-byte alignment + | |<- R9 + | |<- R8 + | |<- RDI + | |<- RSI + | |<- RDX + | |<- RCX + | |<- RAX + +==================+<- RSP aligned at 64 bytes + + */ # define STATE_SAVE_OFFSET (8 * 7 + 8) +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning + stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and + R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved + RDI, RSI and RBX values on stack by xsave. + + +==================+<- stack frame start aligned at 8 or 16 bytes + | |<- RDI saved in the red zone + | |<- RSI saved in the red zone + | |<- RBX saved in the red zone + | |<- paddings for stack realignment of 64 bytes + |------------------|<- xsave buffer end aligned at 64 bytes + | |<- + | |<- + | |<- + |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) + | |<- 8-byte padding for 64-byte alignment + | |<- 8-byte padding for 64-byte alignment + | |<- R11 + | |<- R10 + | |<- R9 + | |<- R8 + | |<- RDX + | |<- RCX + +==================+<- RSP aligned at 64 bytes + + Define the total register save area size for all integer registers by + adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto + stack without adjusting stack pointer first, using the red-zone. */ +# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24) + /* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX registers are mutually exclusive. */ # define STATE_SAVE_MASK \ @@ -66,8 +111,9 @@ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK) #else /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 - doesn't have red-zone, use 0 here. */ + uses PUSH to save registers onto stack, use 0 here. */ # define STATE_SAVE_OFFSET 0 +# define TLSDESC_CALL_REGISTER_SAVE_AREA 0 /* Save SSE, AVX, AXV512, mask and bound registers. */ # define STATE_SAVE_MASK \ diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S new file mode 100644 index 0000000000..1d636669ba --- /dev/null +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S @@ -0,0 +1,87 @@ +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec + to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size + is 960 bytes and this test didn't fail. It may be due to the unused + last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and + this test might fail without the fix. */ +#ifndef OFFSET +# define OFFSET 40 +#endif + + .text + .p2align 4 + .globl apply_tls + .type apply_tls, @function +apply_tls: + cfi_startproc + _CET_ENDBR + pushq %rbp + cfi_def_cfa_offset (16) + cfi_offset (6, -16) + movdqu (%RDI_LP), %xmm0 + lea tls_var1@TLSDESC(%rip), %RAX_LP + mov %RSP_LP, %RBP_LP + cfi_def_cfa_register (6) + /* Align stack to 64 bytes. */ + and $-64, %RSP_LP + sub $OFFSET, %RSP_LP + pushq %rbx + /* Set %ebx to 0xbadbeef. */ + movl $0xbadbeef, %ebx + movl $0xbadbeef, %esi + movq %rdi, saved_rdi(%rip) + movq %rsi, saved_rsi(%rip) + call *tls_var1@TLSCALL(%RAX_LP) + /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */ + cmpq saved_rdi(%rip), %rdi + jne L(hlt) + cmpq saved_rsi(%rip), %rsi + jne L(hlt) + cmpl $0xbadbeef, %ebx + jne L(hlt) + add %fs:0, %RAX_LP + movups %xmm0, 32(%RAX_LP) + movdqu 16(%RDI_LP), %xmm1 + mov %RAX_LP, %RBX_LP + movups %xmm1, 48(%RAX_LP) + lea 32(%RBX_LP), %RAX_LP + pop %rbx + leave + cfi_def_cfa (7, 8) + ret +L(hlt): + hlt + cfi_endproc + .size apply_tls, .-apply_tls + .hidden tls_var1 + .globl tls_var1 + .section .tbss,"awT",@nobits + .align 16 + .type tls_var1, @object + .size tls_var1, 3200 +tls_var1: + .zero 3200 + .local saved_rdi + .comm saved_rdi,8,8 + .local saved_rsi + .comm saved_rsi,8,8 + .section .note.GNU-stack,"",@progbits