Message ID | 20210924042623.3899762-1-maskray@google.com |
---|---|
State | New |
Headers | show |
Series | i386: Port elf_machine_{load_address,dynamic} from x86-64 | expand |
On Thu, Sep 23, 2021 at 9:26 PM Fangrui Song <maskray@google.com> wrote: > > This drops reliance on _GLOBAL_OFFSET_TABLE_[0] being the link-time > address of _DYNAMIC. > > The code sequence length does not change. > --- > sysdeps/i386/dl-machine.h | 29 +++++++++++------------------ > 1 file changed, 11 insertions(+), 18 deletions(-) > > diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h > index 590b41d8d7..9f0eeaf66e 100644 > --- a/sysdeps/i386/dl-machine.h > +++ b/sysdeps/i386/dl-machine.h > @@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr) > } > > > -/* Return the link-time address of _DYNAMIC. Conveniently, this is the > - first element of the GOT, a special entry that is never relocated. */ > -static inline Elf32_Addr __attribute__ ((unused, const)) > -elf_machine_dynamic (void) > -{ > - /* This produces a GOTOFF reloc that resolves to zero at link time, so in > - fact just loads from the GOT register directly. By doing it without > - an asm we can let the compiler choose any register. */ > - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden; > - return _GLOBAL_OFFSET_TABLE_[0]; > -} > - > /* Return the run-time load address of the shared object. */ > -static inline Elf32_Addr __attribute__ ((unused)) > +static inline ElfW(Addr) __attribute__ ((unused)) > elf_machine_load_address (void) > { > - /* Compute the difference between the runtime address of _DYNAMIC as seen > - by a GOTOFF reference, and the link-time address found in the special > - unrelocated first GOT entry. */ > - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden; > - return (Elf32_Addr) &bygotoff - elf_machine_dynamic (); > + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; > + return (ElfW(Addr)) &__ehdr_start; > +} > + > +/* Return the link-time address of _DYNAMIC. */ > +static inline ElfW(Addr) __attribute__ ((unused)) > +elf_machine_dynamic (void) > +{ > + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; > + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); > } > > /* Set up the loaded object described by L so its unrelocated PLT > -- > 2.33.0.685.g46640cef36-goog > what are the code differences before and after?
On 2021-09-23, H.J. Lu wrote: >On Thu, Sep 23, 2021 at 9:26 PM Fangrui Song <maskray@google.com> wrote: >> >> This drops reliance on _GLOBAL_OFFSET_TABLE_[0] being the link-time >> address of _DYNAMIC. >> >> The code sequence length does not change. >> --- >> sysdeps/i386/dl-machine.h | 29 +++++++++++------------------ >> 1 file changed, 11 insertions(+), 18 deletions(-) >> >> diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h >> index 590b41d8d7..9f0eeaf66e 100644 >> --- a/sysdeps/i386/dl-machine.h >> +++ b/sysdeps/i386/dl-machine.h >> @@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr) >> } >> >> >> -/* Return the link-time address of _DYNAMIC. Conveniently, this is the >> - first element of the GOT, a special entry that is never relocated. */ >> -static inline Elf32_Addr __attribute__ ((unused, const)) >> -elf_machine_dynamic (void) >> -{ >> - /* This produces a GOTOFF reloc that resolves to zero at link time, so in >> - fact just loads from the GOT register directly. By doing it without >> - an asm we can let the compiler choose any register. */ >> - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden; >> - return _GLOBAL_OFFSET_TABLE_[0]; >> -} >> - >> /* Return the run-time load address of the shared object. */ >> -static inline Elf32_Addr __attribute__ ((unused)) >> +static inline ElfW(Addr) __attribute__ ((unused)) >> elf_machine_load_address (void) >> { >> - /* Compute the difference between the runtime address of _DYNAMIC as seen >> - by a GOTOFF reference, and the link-time address found in the special >> - unrelocated first GOT entry. */ >> - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden; >> - return (Elf32_Addr) &bygotoff - elf_machine_dynamic (); >> + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; >> + return (ElfW(Addr)) &__ehdr_start; >> +} >> + >> +/* Return the link-time address of _DYNAMIC. */ >> +static inline ElfW(Addr) __attribute__ ((unused)) >> +elf_machine_dynamic (void) >> +{ >> + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; >> + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); >> } >> >> /* Set up the loaded object described by L so its unrelocated PLT >> -- >> 2.33.0.685.g46640cef36-goog >> > >what are the code differences before and after? long ehdr(void) { extern char __ehdr_start[] __attribute__((visibility("hidden"))); return (long)__ehdr_start; } long got(void) { extern long _GLOBAL_OFFSET_TABLE_[] __attribute__((visibility("hidden"))); return _GLOBAL_OFFSET_TABLE_[0]; } ehdr: call __x86.get_pc_thunk.ax addl $_GLOBAL_OFFSET_TABLE_, %eax leal __ehdr_start@GOTOFF(%eax), %eax ret got: call __x86.get_pc_thunk.ax addl $_GLOBAL_OFFSET_TABLE_, %eax movl _GLOBAL_OFFSET_TABLE_@GOTOFF(%eax), %eax ret In GCC generated elf/rtld.os, the local code sequence related to __ehdr_start/_GLOBAL_OFFSET_TABLE_ does not change its size, but globally the new code triggers some code motion and eventually makes the file smaller. FWIW ld.so:.text is 48 bytes smaller. New code doesn't have memory load. I guess it may allow GCC to optimize more.
On Thu, Sep 23, 2021 at 9:26 PM Fangrui Song <maskray@google.com> wrote: > > This drops reliance on _GLOBAL_OFFSET_TABLE_[0] being the link-time > address of _DYNAMIC. > > The code sequence length does not change. > --- > sysdeps/i386/dl-machine.h | 29 +++++++++++------------------ > 1 file changed, 11 insertions(+), 18 deletions(-) > > diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h > index 590b41d8d7..9f0eeaf66e 100644 > --- a/sysdeps/i386/dl-machine.h > +++ b/sysdeps/i386/dl-machine.h > @@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr) > } > > > -/* Return the link-time address of _DYNAMIC. Conveniently, this is the > - first element of the GOT, a special entry that is never relocated. */ > -static inline Elf32_Addr __attribute__ ((unused, const)) > -elf_machine_dynamic (void) > -{ > - /* This produces a GOTOFF reloc that resolves to zero at link time, so in > - fact just loads from the GOT register directly. By doing it without > - an asm we can let the compiler choose any register. */ > - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden; > - return _GLOBAL_OFFSET_TABLE_[0]; > -} > - > /* Return the run-time load address of the shared object. */ > -static inline Elf32_Addr __attribute__ ((unused)) > +static inline ElfW(Addr) __attribute__ ((unused)) > elf_machine_load_address (void) > { > - /* Compute the difference between the runtime address of _DYNAMIC as seen > - by a GOTOFF reference, and the link-time address found in the special > - unrelocated first GOT entry. */ > - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden; > - return (Elf32_Addr) &bygotoff - elf_machine_dynamic (); > + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; > + return (ElfW(Addr)) &__ehdr_start; > +} > + > +/* Return the link-time address of _DYNAMIC. */ > +static inline ElfW(Addr) __attribute__ ((unused)) > +elf_machine_dynamic (void) > +{ > + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; > + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); > } Please use Elf32 instead of ElfW. > /* Set up the loaded object described by L so its unrelocated PLT > -- > 2.33.0.685.g46640cef36-goog >
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h index 590b41d8d7..9f0eeaf66e 100644 --- a/sysdeps/i386/dl-machine.h +++ b/sysdeps/i386/dl-machine.h @@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr) } -/* Return the link-time address of _DYNAMIC. Conveniently, this is the - first element of the GOT, a special entry that is never relocated. */ -static inline Elf32_Addr __attribute__ ((unused, const)) -elf_machine_dynamic (void) -{ - /* This produces a GOTOFF reloc that resolves to zero at link time, so in - fact just loads from the GOT register directly. By doing it without - an asm we can let the compiler choose any register. */ - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden; - return _GLOBAL_OFFSET_TABLE_[0]; -} - /* Return the run-time load address of the shared object. */ -static inline Elf32_Addr __attribute__ ((unused)) +static inline ElfW(Addr) __attribute__ ((unused)) elf_machine_load_address (void) { - /* Compute the difference between the runtime address of _DYNAMIC as seen - by a GOTOFF reference, and the link-time address found in the special - unrelocated first GOT entry. */ - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden; - return (Elf32_Addr) &bygotoff - elf_machine_dynamic (); + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; + return (ElfW(Addr)) &__ehdr_start; +} + +/* Return the link-time address of _DYNAMIC. */ +static inline ElfW(Addr) __attribute__ ((unused)) +elf_machine_dynamic (void) +{ + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); } /* Set up the loaded object described by L so its unrelocated PLT