Message ID | 20210713073845.504356-4-siddhesh@sourceware.org |
---|---|
State | New |
Headers | show |
Series | malloc hooks removal | expand |
FYI, there's at least one use case[1] that is adversely affected by __morecore removal. I have closed it as WONTFIX citing that malloc does not always do the right thing with arbitrary morecore (and we don't even test it) anyway and it's a net win to remove them, but I mention it here too in the interest of a wider discussion. Guillaume, would you like to elaborate on the use case a bit more so that we know exactly what we're dealing with? Siddhesh [1] https://sourceware.org/bugzilla/show_bug.cgi?id=20646 On 7/13/21 1:08 PM, Siddhesh Poyarekar via Libc-alpha wrote: > Make the __morecore and __default_morecore symbols compat-only and > remove their declarations from the API. Also, include morecore.c > directly into malloc.c; this should ideally get merged into malloc in > a future cleanup. > --- > NEWS | 5 +++++ > include/stdlib.h | 3 --- > malloc/Makefile | 2 +- > malloc/arena.c | 12 ++---------- > malloc/hooks.c | 2 ++ > malloc/malloc.c | 7 +++---- > malloc/malloc.h | 8 -------- > malloc/morecore.c | 34 +++++++++++----------------------- > 8 files changed, 24 insertions(+), 49 deletions(-) > > diff --git a/NEWS b/NEWS > index 13ffe627da..18d9e65eb2 100644 > --- a/NEWS > +++ b/NEWS > @@ -113,6 +113,11 @@ Deprecated and removed features, and other changes affecting compatibility: > mtrace. Similar functionality can be achieved by using conditional > breakpoints within mtrace functions from within gdb. > > +* The __morecore and __after_morecore_hook malloc hooks and the default > + implementation __default_morecore have been removed from the API. Existing > + applications will continue to link against these symbols but the interfaces > + no longer have any effect on malloc. > + > Changes to build and runtime requirements: > > * On Linux, the shm_open, sem_open, and related functions now expect the > diff --git a/include/stdlib.h b/include/stdlib.h > index 1f6e1508e4..1c6f70b082 100644 > --- a/include/stdlib.h > +++ b/include/stdlib.h > @@ -306,9 +306,6 @@ libc_hidden_proto (__qfcvt_r) > # define MB_CUR_MAX (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MB_CUR_MAX)) > # endif > > -extern void *__default_morecore (ptrdiff_t) __THROW; > -libc_hidden_proto (__default_morecore) > - > struct abort_msg_s > { > unsigned int size; > diff --git a/malloc/Makefile b/malloc/Makefile > index d15729569b..020f781b59 100644 > --- a/malloc/Makefile > +++ b/malloc/Makefile > @@ -104,7 +104,7 @@ tests-exclude-mcheck = tst-mallocstate \ > tests-mcheck = $(filter-out $(tests-exclude-mcheck), $(tests)) > endif > > -routines = malloc morecore mcheck mtrace obstack reallocarray \ > +routines = malloc mcheck mtrace obstack reallocarray \ > scratch_buffer_dupfree \ > scratch_buffer_grow scratch_buffer_grow_preserve \ > scratch_buffer_set_array_size \ > diff --git a/malloc/arena.c b/malloc/arena.c > index 991fc21a7e..f1693ed48f 100644 > --- a/malloc/arena.c > +++ b/malloc/arena.c > @@ -274,14 +274,6 @@ next_env_entry (char ***position) > #endif > > > -#if defined(SHARED) || defined(USE_MTAG) > -static void * > -__failing_morecore (ptrdiff_t d) > -{ > - return (void *) MORECORE_FAILURE; > -} > -#endif > - > #ifdef SHARED > extern struct dl_open_hook *_dl_open_hook; > libc_hidden_proto (_dl_open_hook); > @@ -310,7 +302,7 @@ ptmalloc_init (void) > and that morecore does not support tagged regions, then > disable it. */ > if (__MTAG_SBRK_UNTAGGED) > - __morecore = __failing_morecore; > + __always_fail_morecore = true; > > mtag_enabled = true; > mtag_mmap_flags = __MTAG_MMAP_FLAGS; > @@ -323,7 +315,7 @@ ptmalloc_init (void) > generic sbrk implementation also enforces this, but it is not > used on Hurd. */ > if (!__libc_initial) > - __morecore = __failing_morecore; > + __always_fail_morecore = true; > #endif > > thread_arena = &main_arena; > diff --git a/malloc/hooks.c b/malloc/hooks.c > index 45c91d6502..4aa6dadcff 100644 > --- a/malloc/hooks.c > +++ b/malloc/hooks.c > @@ -20,6 +20,8 @@ > #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_34) > void weak_variable (*__after_morecore_hook) (void) = NULL; > compat_symbol (libc, __after_morecore_hook, __after_morecore_hook, GLIBC_2_0); > +void *(*__morecore)(ptrdiff_t); > +compat_symbol (libc, __morecore, __morecore, GLIBC_2_0); > #endif > > /* Hooks for debugging versions. The initial hooks just call the > diff --git a/malloc/malloc.c b/malloc/malloc.c > index 24e7854a0e..6e8fa9e424 100644 > --- a/malloc/malloc.c > +++ b/malloc/malloc.c > @@ -384,12 +384,11 @@ __malloc_assert (const char *assertion, const char *file, unsigned int line, > #define TRIM_FASTBINS 0 > #endif > > - > /* Definition for getting more memory from the OS. */ > -#define MORECORE (*__morecore) > +#include "morecore.c" > + > +#define MORECORE (*__glibc_morecore) > #define MORECORE_FAILURE 0 > -void * __default_morecore (ptrdiff_t); > -void *(*__morecore)(ptrdiff_t) = __default_morecore; > > /* Memory tagging. */ > > diff --git a/malloc/malloc.h b/malloc/malloc.h > index 634b7db868..17ab9ee345 100644 > --- a/malloc/malloc.h > +++ b/malloc/malloc.h > @@ -76,14 +76,6 @@ extern void *valloc (size_t __size) __THROW __attribute_malloc__ > extern void *pvalloc (size_t __size) __THROW __attribute_malloc__ > __wur __attr_dealloc_free; > > -/* Underlying allocation function; successive calls should return > - contiguous pieces of memory. */ > -extern void *(*__morecore) (ptrdiff_t __size) __MALLOC_DEPRECATED; > - > -/* Default value of `__morecore'. */ > -extern void *__default_morecore (ptrdiff_t __size) > -__THROW __attribute_malloc__ __MALLOC_DEPRECATED; > - > /* SVID2/XPG mallinfo structure */ > > struct mallinfo > diff --git a/malloc/morecore.c b/malloc/morecore.c > index 047228779b..8168ef158c 100644 > --- a/malloc/morecore.c > +++ b/malloc/morecore.c > @@ -15,39 +15,27 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#ifndef _MALLOC_INTERNAL > -# define _MALLOC_INTERNAL > -# include <malloc.h> > -#endif > - > -#ifndef __GNU_LIBRARY__ > -# define __sbrk sbrk > -#endif > - > -#ifdef __GNU_LIBRARY__ > -/* It is best not to declare this and cast its result on foreign operating > - systems with potentially hostile include files. */ > - > -# include <stddef.h> > -# include <stdlib.h> > -extern void *__sbrk (ptrdiff_t increment) __THROW; > -libc_hidden_proto (__sbrk) > -#endif > - > -#ifndef NULL > -# define NULL 0 > +#if defined(SHARED) || defined(USE_MTAG) > +static bool __always_fail_morecore = false; > #endif > > /* Allocate INCREMENT more bytes of data space, > and return the start of data space, or NULL on errors. > If INCREMENT is negative, shrink data space. */ > void * > -__default_morecore (ptrdiff_t increment) > +__glibc_morecore (ptrdiff_t increment) > { > +#if defined(SHARED) || defined(USE_MTAG) > + if (__always_fail_morecore) > + return NULL; > +#endif > + > void *result = (void *) __sbrk (increment); > if (result == (void *) -1) > return NULL; > > return result; > } > -libc_hidden_def (__default_morecore) > +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_34) > +compat_symbol (libc, __glibc_morecore, __default_morecore, GLIBC_2_0); > +#endif >
On 14 Jul 12:31, Siddhesh Poyarekar wrote: > FYI, there's at least one use case[1] that is adversely affected by > __morecore removal. I have closed it as WONTFIX citing that malloc does not > always do the right thing with arbitrary morecore (and we don't even test > it) anyway and it's a net win to remove them, but I mention it here too in > the interest of a wider discussion. > > Guillaume, would you like to elaborate on the use case a bit more so that we > know exactly what we're dealing with? > > Siddhesh > > [1] https://sourceware.org/bugzilla/show_bug.cgi?id=20646 Hello Siddesh, I replied on the bug report becuase I had not seen this message. But basically, this breaks https://github.com/libhugetlbfs/libhugetlbfs/ which is old (started in 2006) and commonly used library. The library is plugging its own morecore() implementation to use hugetlb pages to back the malloc heap. This is definitely not a win for all libhugetlbfs users. We have no equivalent solution. You're asking to find us an entire new malloc implementation if there is one (or write one?). We have no way of keep using glibc's malloc and hugetlb. I am not even sure there exists an equivalent replacement. I understand there is some security concern about malloc hooks but then why not allow morecore() substitution with a properly documented/supported interface? Most users already LD_PRELOAD libhugetlbfs so that would be an easy fix. You're claiming that it's subtly broken. I'd like to understand how and why this can't be fixed. Afaik people have been using libhugetlbfs's morecore() in production for 15+ years without any issue. The only issue I am aware of is the one I reported about 5 years ago (along with a reproducer and a patch). This problem can only be reached if trimming is enabled in the morecore implementation (it's disabled in libhugetlbfs for this very reason).
On 7/14/21 6:24 PM, Guillaume Morin wrote: > On 14 Jul 12:31, Siddhesh Poyarekar wrote: > I replied on the bug report becuase I had not seen this message. > > But basically, this breaks https://github.com/libhugetlbfs/libhugetlbfs/ > which is old (started in 2006) and commonly used library. The library > is plugging its own morecore() implementation to use hugetlb pages to > back the malloc heap. Thanks, AFAICT, libhugetlbfs only uses __morecore and not the rest of the interfaces. The thing is, __morecore and friends have been deprecated for a year and building anything with __morecore ought to give the deprecation warning. > This is definitely not a win for all libhugetlbfs users. We have no > equivalent solution. You're asking to find us an entire new malloc > implementation if there is one (or write one?). We have no way of keep > using glibc's malloc and hugetlb. I am not even sure there exists > an equivalent replacement. > > I understand there is some security concern about malloc hooks but then > why not allow morecore() substitution with a properly > documented/supported interface? Most users already LD_PRELOAD > libhugetlbfs so that would be an easy fix. I don't think we would like to export a different interface to do the same thing because it still doesn't allow us to clamp down further on the system malloc implementation. Besides, the morecore hooking interface is severely limited because it only affects the main arena. Non-main arenas have their own allocation techniques and it doesn't really make sense to have an interface to control just the main arena. > You're claiming that it's subtly broken. I'd like to understand how and > why this can't be fixed. Afaik people have been using libhugetlbfs's > morecore() in production for 15+ years without any issue. > > The only issue I am aware of is the one I reported about 5 years ago > (along with a reproducer and a patch). This problem can only be reached > if trimming is enabled in the morecore implementation (it's disabled in > libhugetlbfs for this very reason). There is only the one known issue that you reported but that may well be because the interface isn't well tested. There have been many changes to malloc over the years and I cannot say for certain that the idea that morecore could be anything other than brk has been consistently considered. We don't have tests to verify that either. Further, morecore limits its influence to the main arena and it is possible that future changes could break this in ways that we cannot anticipate today. Basically this is technical debt we've accumulated from the days when malloc assumed single threaded programs and was incrementally hacked to add in multi-threaded support. We'd like to clean this up. The malloc code in glibc is quite general purpose and has little dependency on libc internals. It may well be possible for you to copy the source files over into libhugetlbfs to implement the interposer within libhugetlbfs that does exactly what you need. In fact, that may even allow you to extend hugetlbfs support to non-main arenas. Siddhesh
On 14 Jul 19:43, Siddhesh Poyarekar wrote: > > But basically, this breaks https://github.com/libhugetlbfs/libhugetlbfs/ > > which is old (started in 2006) and commonly used library. The library > > is plugging its own morecore() implementation to use hugetlb pages to > > back the malloc heap. > Thanks, AFAICT, libhugetlbfs only uses __morecore and not the rest of the > interfaces. The thing is, __morecore and friends have been deprecated for a > year and building anything with __morecore ought to give the deprecation > warning. Probably. But you surely know that most people get glibc from their distributions and do not build their own glibc. My main box is running Debian stable for example. The next release (bullseye) seems like it'll be shipping 2.31 (where it's not deprecated yet, I believe). RHEL8 ships with a libhugetlbfs rpm (so users do not need to build it at all unlike Debian), which will become useless with this change! Most users will never have seen the deprecation warning before they see the actual removal. That said, I was personally aware since it was pointed out on a libhugetlbfs github issue that it was deprecated in 2.32. I think the hope was it would be replaced by something usable, and users would not be left with *no* solution (I was also quietly hoping Eric Munson, the libhugetlbfs maintainer would be reaching out). But if you remove it, I am not quite sure what you expect libhugetlbfs users to do, really. > I don't think we would like to export a different interface to do the same > thing because it still doesn't allow us to clamp down further on the system > malloc implementation. Besides, the morecore hooking interface is severely > limited because it only affects the main arena. Non-main arenas have their > own allocation techniques and it doesn't really make sense to have an > interface to control just the main arena. Well, I do not see why morecore() does not make sense as a plugin. I am not seeing any technical argument supporting that it will prevent clamping down the implementation provided you document the currently acceptable semantics for this interface. libhugetlbfs users are aware this only works in the main arena. I certainly agree this is not ideal and this is definitely something that could be fixed as well. But again, keep in mind people have been using this for 15+ years. Obviously, if you're arguing to extend the glibc hookability so it works for all arenas, I am all ears, certainly would help out and would be a day one user :-) > There is only the one known issue that you reported but that may well be > because the interface isn't well tested. There have been many changes to > malloc over the years and I cannot say for certain that the idea that > morecore could be anything other than brk has been consistently considered. > We don't have tests to verify that either. I guess you mean sbrk() above and not brk(). Again, you seem to be ignoring the 15 years of people using libhugetblfs. It's been tested extensively by users. I certainly agree it should get testing in the glibc testsuite, but it's actually trivial afaict: - morecore() in effect has very simple semantics. It takes a size and returns a pointer to the beginning of the new area. The only thing that differs here from sbrk() is the fact that a new area might not be contiguous with the previous segment. But that's supported in the malloc code, otherwise libhugetlbfs morecore() would never have worked (that's the MORECORE_CONTIGUOUS handling in the malloc code). That's really _all_ you need to test. And I would presume non contiguous heaps are implemented not just for morecore() sake (not sure though). If that's case, no actual specific testing needs to be done. - If you _only_ allow morecore interposition through a DSO (as I was suggesting), you do _not_ even need to support non contiguous heaps (at least for morecore()) since at that point it becomes guaranteed that morecore() will always point to the same implementation. Then there is _nothing_ to test at that point. Proper morecore() implementations can semantically behave exactly like sbrk! - A morecore2() hook that takes the old top of the heap as well as the size would also allow libhugetlbfs() to implement a perfectly continuous heap and so would semantically behave like sbrk(). But it does not look like it's being considered here :-) > Further, morecore limits its influence to the main arena and it is > possible that future changes could break this in ways that we cannot > anticipate today. As I said, we're aware and it not ideal :-). But if you're using single threaded software, that covers all allocations under the mmap threshold (which is tunable). So it's _very_ easy to make this hook usable for a lot of software. The limitations are not such a big deal (but again, I'd prefer if they were not there). > Basically this is technical debt we've accumulated from the days when malloc > assumed single threaded programs and was incrementally hacked to add in > multi-threaded support. We'd like to clean this up. Honestly from my PoV, some usual feature was added a while ago. The rest of the code was changed without paying attention to the feature (bug report ignored for 5 years as well). And you're using this as an argument to remove it.... I don't think this is tech debt at all, the feature is simply paying the price of being ignored. It does _seem_ to me that little effort have been made to see if it was workable at all. > The malloc code in glibc is quite general purpose and has little dependency > on libc internals. It may well be possible for you to copy the source files > over into libhugetlbfs to implement the interposer within libhugetlbfs that > does exactly what you need. In fact, that may even allow you to extend > hugetlbfs support to non-main arenas. To be clear, I am not the libhugetblfs maintainer, just a user (that investigated the trimming issue). I think everybody is aware they can fork the glibc malloc implementation. Surely you realize that it's not ideal but even feasible for most users. Guillaume.
On 7/14/21 12:42 PM, Guillaume Morin wrote: > That said, I was personally aware since it was pointed out on a > libhugetlbfs github issue that it was deprecated in 2.32. I think the > hope was it would be replaced by something usable, and users would not > be left with *no* solution (I was also quietly hoping Eric Munson, the > libhugetlbfs maintainer would be reaching out). But if you remove it, I > am not quite sure what you expect libhugetlbfs users to do, really. Please work with upstream libhugetlbfs to prioritize issue #52. Deprecation of morecore in glibc #52 (2020-08-27): https://github.com/libhugetlbfs/libhugetlbfs/issues/52
On 7/14/21 10:12 PM, Guillaume Morin wrote: > On 14 Jul 19:43, Siddhesh Poyarekar wrote: > Probably. But you surely know that most people get glibc from their > distributions and do not build their own glibc. My main box is running > Debian stable for example. The next release (bullseye) seems like it'll > be shipping 2.31 (where it's not deprecated yet, I believe). RHEL8 ships > with a libhugetlbfs rpm (so users do not need to build it at all unlike > Debian), which will become useless with this change! Most users will > never have seen the deprecation warning before they see the actual > removal. But that means it won't be useless in RHEL8 or current stable Debian/Ubuntu, so there's still time for libhugetlbfs to adapt to the change. > That said, I was personally aware since it was pointed out on a > libhugetlbfs github issue that it was deprecated in 2.32. I think the > hope was it would be replaced by something usable, and users would not > be left with *no* solution (I was also quietly hoping Eric Munson, the > libhugetlbfs maintainer would be reaching out). But if you remove it, I > am not quite sure what you expect libhugetlbfs users to do, really. As is evident, I personally haven't followed libhugetlbfs development and have been carrying forward work to remove __morecore that started at least a year ago, if not more. That said though, the deprecation warning (especially the fact that the release notes specifically mentioned malloc interpositioning as the alternative) ought to have made it clear that we weren't thinking of an alternative. > Well, I do not see why morecore() does not make sense as a plugin. I am > not seeing any technical argument supporting that it will prevent > clamping down the implementation provided you document the currently > acceptable semantics for this interface. Right now the debug DSO that this patch pulls out only supports the debugging features (malloc_check, mcheck, mtrace, old hooks) and they're not intended to be linked against, only used as compatibility for existing programs. If __morecore is to move there, we'd have to allow linking with the debug DSO, which we don't want to do because it multiplies the interfaces we need to support. We'd basically have two malloc interfaces to support then. > libhugetlbfs users are aware this only works in the main arena. I > certainly agree this is not ideal and this is definitely something that > could be fixed as well. But again, keep in mind people have been using > this for 15+ years. > > Obviously, if you're arguing to extend the glibc hookability so it works > for all arenas, I am all ears, certainly would help out and would be a > day one user :-) Unfortunately not, I'd like the system malloc to be as less hookable as possible because the hookability is a security nightmare and it prevents a lot of optimizations and hardening. > Again, you seem to be ignoring the 15 years of people using > libhugetblfs. It's been tested extensively by users. I certainly agree > it should get testing in the glibc testsuite, but it's actually trivial > afaict: > > - morecore() in effect has very simple semantics. It takes a size and > returns a pointer to the beginning of the new area. The only thing > that differs here from sbrk() is the fact that a new area might not be > contiguous with the previous segment. But that's supported in the > malloc code, otherwise libhugetlbfs morecore() would never have worked > (that's the MORECORE_CONTIGUOUS handling in the malloc code). That's > really _all_ you need to test. And I would presume non contiguous > heaps are implemented not just for morecore() sake (not sure though). > If that's case, no actual specific testing needs to be done. > > - If you _only_ allow morecore interposition through a DSO (as I was > suggesting), you do _not_ even need to support non contiguous heaps > (at least for morecore()) since at that point it becomes guaranteed > that morecore() will always point to the same implementation. Then > there is _nothing_ to test at that point. Proper morecore() > implementations can semantically behave exactly like sbrk! > > - A morecore2() hook that takes the old top of the heap as well as the > size would also allow libhugetlbfs() to implement a perfectly > continuous heap and so would semantically behave like sbrk(). But it > does not look like it's being considered here :-) The direction we want to take is to not have to support all this in the interest of simplifying the implementation and make it easier to reason, audit and debug. > Honestly from my PoV, some usual feature was added a while ago. The rest > of the code was changed without paying attention to the feature (bug > report ignored for 5 years as well). And you're using this as an > argument to remove it.... I don't think this is tech debt at all, the > feature is simply paying the price of being ignored. It does _seem_ to > me that little effort have been made to see if it was workable at all. Different developers have been involved in this and I suppose the developers who wrote the original implementation may even agree with you. However, technology has changed a lot over the years and many of the things that were thought to be useful are now either redundant, unwieldy or downright dangerous. The hooks fall somewhere between the last two because in the general case they're quite hard to reason and in the simplest case, they're unprotected indirections that have historically been used as vulnerability primitives for years. > To be clear, I am not the libhugetblfs maintainer, just a user (that > investigated the trimming issue). I think everybody is aware they can > fork the glibc malloc implementation. Surely you realize that it's not > ideal but even feasible for most users. I understand, I'm sorry it has come to this. I hope libhugetlbfs maintainers can find a way out sooner. Siddhesh
On 14/07/2021 14:15, Carlos O'Donell via Libc-alpha wrote: > On 7/14/21 12:42 PM, Guillaume Morin wrote: >> That said, I was personally aware since it was pointed out on a >> libhugetlbfs github issue that it was deprecated in 2.32. I think the >> hope was it would be replaced by something usable, and users would not >> be left with *no* solution (I was also quietly hoping Eric Munson, the >> libhugetlbfs maintainer would be reaching out). But if you remove it, I >> am not quite sure what you expect libhugetlbfs users to do, really. > > Please work with upstream libhugetlbfs to prioritize issue #52. > > Deprecation of morecore in glibc #52 (2020-08-27): > https://github.com/libhugetlbfs/libhugetlbfs/issues/52 > I agree with Carlos and Siddhesh rationale that a dangling function pointer in an essential code as malloc is a security liability and it is about time to get removed (as there is PoC out there that does it to exploit glibc malloc). However I think we can work towards a solution to enable within glibc instead of pushing the support on libhugetlfs, which does *not* aim to be a malloc replacement but rather a way to explicit provide and manage large pages. There is some discussion last year about providing large pages support directly on glibc without resorting to THP [1]. The idea is to hook up the mmap/madvise with the required flags, which seems similar of what is required by libhugetls morecore() implementation [2]. So I think it should be feasible to add support for the required bits on glibc and provide a tunable to actually use it. The proposed patchset does require some additional work, such as providing is through a tunable, allowing different sizes depending of the architecture, and maybe just dump the sbrk change in favor of just using mmap() for the case of large pages. Guillaume, would this be suffice for your use cases. The libhugetlsfs projects does provide more functionality than glibc scope, for instance a linker script to for bss/data and exe section to large parges, so I glibc support would be a complement to what libhugetls provides. [1] https://sourceware.org/pipermail/libc-alpha/2020-May/113539.html [2] https://github.com/libhugetlbfs/libhugetlbfs/blob/master/morecore.c
On 14 Jul 23:02, Siddhesh Poyarekar wrote: > But that means it won't be useless in RHEL8 or current stable Debian/Ubuntu, > so there's still time for libhugetlbfs to adapt to the change. As I said though, there is no other way besides forking glibc's malloc. the morecore.c implementation in libhugetlbfs is like 100 lines of code. We're not talking apples to apples. Far from it. > As is evident, I personally haven't followed libhugetlbfs development and > have been carrying forward work to remove __morecore that started at least a > year ago, if not more. That said though, the deprecation warning > (especially the fact that the release notes specifically mentioned malloc > interpositioning as the alternative) ought to have made it clear that we > weren't thinking of an alternative. Well, I will certainly grant that someone should have reached out sooner, though I would probably have done it if my bug report had not been ignore for 5 years ;-) > > Well, I do not see why morecore() does not make sense as a plugin. I am > > not seeing any technical argument supporting that it will prevent > > clamping down the implementation provided you document the currently > > acceptable semantics for this interface. > > Right now the debug DSO that this patch pulls out only supports the > debugging features (malloc_check, mcheck, mtrace, old hooks) and they're not > intended to be linked against, only used as compatibility for existing > programs. If __morecore is to move there, we'd have to allow linking with > the debug DSO, which we don't want to do because it multiplies the > interfaces we need to support. We'd basically have two malloc interfaces to > support then. I did not mean to say it should be moved to the debug DSO. What I am saying is having explicit glibc support so I can LD_PRELOAD a version of libhugetlbfs that would export a __morecore() that will then be used by glibc. No function pointers that needs explicitly set. I am suggesting morecore interposition instead of the malloc interposition you initially suggested. That should make no security difference at all. It just requires to agree on some semantics though for the interface which are basically these are sbrk. > The direction we want to take is to not have to support all this in the > interest of simplifying the implementation and make it easier to reason, > audit and debug. I think I have made decent technical points that it's not complicated at all. There are no complications or security implications that I see, just agreeing on the semantics on one function ... I keep hearing theoretical concerns but nobody seems to want to detail: why morecore interposition is complicated or insecure. I'll grant you guys are the experts here, so please humor me, I am all ears. > The hooks fall somewhere between the last two because in the > general case they're quite hard to reason and in the simplest case, they're > unprotected indirections that have historically been used as vulnerability > primitives for years. I have not argued for you to keep the hooks as is because i understand your concerns. I am trying to find a solution that alleviates your understandable concerns without breaking every single libhugetlbfs user. Guillaume.
On 14 Jul 13:15, Carlos O'Donell wrote: > > On 7/14/21 12:42 PM, Guillaume Morin wrote: > > That said, I was personally aware since it was pointed out on a > > libhugetlbfs github issue that it was deprecated in 2.32. I think the > > hope was it would be replaced by something usable, and users would not > > be left with *no* solution (I was also quietly hoping Eric Munson, the > > libhugetlbfs maintainer would be reaching out). But if you remove it, I > > am not quite sure what you expect libhugetlbfs users to do, really. > > Please work with upstream libhugetlbfs to prioritize issue #52. > > Deprecation of morecore in glibc #52 (2020-08-27): > https://github.com/libhugetlbfs/libhugetlbfs/issues/52 I wrote quite a few extensive emails in this thread about how there are no good, maintainbable solution for libhugetlbfs that I can see. If there was, I would have written a patch already instead of arguing over email :-) I do get your frustation here and libhugetlbfs users/debs should have reached out earlier. But I cannot do much about that now. I am trying to find a workable compromise here, that's all.
On 14 Jul 14:42, Adhemerval Zanella wrote: > I agree with Carlos and Siddhesh rationale that a dangling function > pointer in an essential code as malloc is a security liability and it > is about time to get removed (as there is PoC out there that does it to > exploit glibc malloc). To reiterate, I have not argued to keep the hook as is. I do understand the valid concerns about potential exploitation with the current scheme. > However I think we can work towards a solution to enable within glibc > instead of pushing the support on libhugetlfs, which does *not* aim to > be a malloc replacement but rather a way to explicit provide and > manage large pages. > There is some discussion last year about providing large pages support > directly on glibc without resorting to THP [1]. The idea is to hook > up the mmap/madvise with the required flags, which seems similar of > what is required by libhugetls morecore() implementation [2]. So I think > it should be feasible to add support for the required bits on glibc > and provide a tunable to actually use it. That would be ideal. > Guillaume, would this be suffice for your use cases. The libhugetlsfs > projects does provide more functionality than glibc scope, for instance > a linker script to for bss/data and exe section to large parges, so I > glibc support would be a complement to what libhugetls provides. Yes, it would be perfect for my use case. Guillaume.
On 7/14/21 11:55 PM, Guillaume Morin wrote: > I did not mean to say it should be moved to the debug DSO. > > What I am saying is having explicit glibc support so I can LD_PRELOAD a > version of libhugetlbfs that would export a __morecore() that will then be > used by glibc. No function pointers that needs explicitly set. > > I am suggesting morecore interposition instead of the malloc > interposition you initially suggested. That should make no security > difference at all. It just requires to agree on some semantics though > for the interface which are basically these are sbrk. This seems to be the central the point of difference. A contract for an interface like that is not limited to hugetlbfs. That said, Adhemerval's suggestion is perhaps a good compromise. I'm far more comfortable with a tunable chosen hugetlbfs morecore implementation within glibc since it does not have any arbitrary interface contracts that we may be obliged to maintain indefinitely. Siddhesh
On 7/14/21 11:12 PM, Adhemerval Zanella wrote: > There is some discussion last year about providing large pages support > directly on glibc without resorting to THP [1]. The idea is to hook > up the mmap/madvise with the required flags, which seems similar of > what is required by libhugetls morecore() implementation [2]. So I think > it should be feasible to add support for the required bits on glibc > and provide a tunable to actually use it. Sure, having hugetlb support within glibc is far more maintainable IMO than exposing a generic interface. > The proposed patchset does require some additional work, such as providing > is through a tunable, allowing different sizes depending of the architecture, > and maybe just dump the sbrk change in favor of just using mmap() for the > case of large pages. I think we should keep it distinct from this patchset. Since adding hugetlb support isn't likely to have an ABI impact (only tunables, which is not ABI), it could be done after 2.34. What do you think? Siddhesh
On 15 Jul 0:13, Siddhesh Poyarekar wrote: > This seems to be the central the point of difference. A contract for an > interface like that is not limited to hugetlbfs. I do agree that it does seem to the central point of contention. I am still a little confused about why that is though. The contract is: pass a size_t, return the previous break. No other inferface necessary, mo special case or anything necessary in the malloc code. You only need to provide the default implementation that is just a straightforward call to sbrk(). > That said, Adhemerval's suggestion is perhaps a good compromise. I'm far > more comfortable with a tunable chosen hugetlbfs morecore implementation > within glibc since it does not have any arbitrary interface contracts that > we may be obliged to maintain indefinitely. If that's workable, I do prefer this solution as well. A tunable is stricly better.
On 7/13/21 3:38 AM, Siddhesh Poyarekar wrote: > Make the __morecore and __default_morecore symbols compat-only and > remove their declarations from the API. Also, include morecore.c > directly into malloc.c; this should ideally get merged into malloc in > a future cleanup. OK for 2.34. Tested without regression on x86_64 and i686. Reviewed-by: Carlos O'Donell <carlos@redhat.com> Tested-by: Carlos O'Donell <carlos@redhat.com> > --- > NEWS | 5 +++++ > include/stdlib.h | 3 --- > malloc/Makefile | 2 +- > malloc/arena.c | 12 ++---------- > malloc/hooks.c | 2 ++ > malloc/malloc.c | 7 +++---- > malloc/malloc.h | 8 -------- > malloc/morecore.c | 34 +++++++++++----------------------- > 8 files changed, 24 insertions(+), 49 deletions(-) > > diff --git a/NEWS b/NEWS > index 13ffe627da..18d9e65eb2 100644 > --- a/NEWS > +++ b/NEWS > @@ -113,6 +113,11 @@ Deprecated and removed features, and other changes affecting compatibility: > mtrace. Similar functionality can be achieved by using conditional > breakpoints within mtrace functions from within gdb. > > +* The __morecore and __after_morecore_hook malloc hooks and the default > + implementation __default_morecore have been removed from the API. Existing > + applications will continue to link against these symbols but the interfaces > + no longer have any effect on malloc. OK. > + > Changes to build and runtime requirements: > > * On Linux, the shm_open, sem_open, and related functions now expect the > diff --git a/include/stdlib.h b/include/stdlib.h > index 1f6e1508e4..1c6f70b082 100644 > --- a/include/stdlib.h > +++ b/include/stdlib.h > @@ -306,9 +306,6 @@ libc_hidden_proto (__qfcvt_r) > # define MB_CUR_MAX (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MB_CUR_MAX)) > # endif > > -extern void *__default_morecore (ptrdiff_t) __THROW; > -libc_hidden_proto (__default_morecore) OK. > - > struct abort_msg_s > { > unsigned int size; > diff --git a/malloc/Makefile b/malloc/Makefile > index d15729569b..020f781b59 100644 > --- a/malloc/Makefile > +++ b/malloc/Makefile > @@ -104,7 +104,7 @@ tests-exclude-mcheck = tst-mallocstate \ > tests-mcheck = $(filter-out $(tests-exclude-mcheck), $(tests)) > endif > > -routines = malloc morecore mcheck mtrace obstack reallocarray \ > +routines = malloc mcheck mtrace obstack reallocarray \ OK. > scratch_buffer_dupfree \ > scratch_buffer_grow scratch_buffer_grow_preserve \ > scratch_buffer_set_array_size \ > diff --git a/malloc/arena.c b/malloc/arena.c > index 991fc21a7e..f1693ed48f 100644 > --- a/malloc/arena.c > +++ b/malloc/arena.c > @@ -274,14 +274,6 @@ next_env_entry (char ***position) > #endif > > > -#if defined(SHARED) || defined(USE_MTAG) > -static void * > -__failing_morecore (ptrdiff_t d) > -{ > - return (void *) MORECORE_FAILURE; > -} > -#endif > - OK. > #ifdef SHARED > extern struct dl_open_hook *_dl_open_hook; > libc_hidden_proto (_dl_open_hook); > @@ -310,7 +302,7 @@ ptmalloc_init (void) > and that morecore does not support tagged regions, then > disable it. */ > if (__MTAG_SBRK_UNTAGGED) > - __morecore = __failing_morecore; > + __always_fail_morecore = true; OK. > > mtag_enabled = true; > mtag_mmap_flags = __MTAG_MMAP_FLAGS; > @@ -323,7 +315,7 @@ ptmalloc_init (void) > generic sbrk implementation also enforces this, but it is not > used on Hurd. */ > if (!__libc_initial) > - __morecore = __failing_morecore; > + __always_fail_morecore = true; OK. > #endif > > thread_arena = &main_arena; > diff --git a/malloc/hooks.c b/malloc/hooks.c > index 45c91d6502..4aa6dadcff 100644 > --- a/malloc/hooks.c > +++ b/malloc/hooks.c > @@ -20,6 +20,8 @@ > #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_34) > void weak_variable (*__after_morecore_hook) (void) = NULL; > compat_symbol (libc, __after_morecore_hook, __after_morecore_hook, GLIBC_2_0); > +void *(*__morecore)(ptrdiff_t); > +compat_symbol (libc, __morecore, __morecore, GLIBC_2_0); OK. > #endif > > /* Hooks for debugging versions. The initial hooks just call the > diff --git a/malloc/malloc.c b/malloc/malloc.c > index 24e7854a0e..6e8fa9e424 100644 > --- a/malloc/malloc.c > +++ b/malloc/malloc.c > @@ -384,12 +384,11 @@ __malloc_assert (const char *assertion, const char *file, unsigned int line, > #define TRIM_FASTBINS 0 > #endif > > - > /* Definition for getting more memory from the OS. */ > -#define MORECORE (*__morecore) > +#include "morecore.c" > + > +#define MORECORE (*__glibc_morecore) OK. > #define MORECORE_FAILURE 0 > -void * __default_morecore (ptrdiff_t); > -void *(*__morecore)(ptrdiff_t) = __default_morecore; OK. > > /* Memory tagging. */ > > diff --git a/malloc/malloc.h b/malloc/malloc.h > index 634b7db868..17ab9ee345 100644 > --- a/malloc/malloc.h > +++ b/malloc/malloc.h > @@ -76,14 +76,6 @@ extern void *valloc (size_t __size) __THROW __attribute_malloc__ > extern void *pvalloc (size_t __size) __THROW __attribute_malloc__ > __wur __attr_dealloc_free; > > -/* Underlying allocation function; successive calls should return > - contiguous pieces of memory. */ > -extern void *(*__morecore) (ptrdiff_t __size) __MALLOC_DEPRECATED; > - > -/* Default value of `__morecore'. */ > -extern void *__default_morecore (ptrdiff_t __size) > -__THROW __attribute_malloc__ __MALLOC_DEPRECATED; OK. > - > /* SVID2/XPG mallinfo structure */ > > struct mallinfo > diff --git a/malloc/morecore.c b/malloc/morecore.c > index 047228779b..8168ef158c 100644 > --- a/malloc/morecore.c > +++ b/malloc/morecore.c > @@ -15,39 +15,27 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#ifndef _MALLOC_INTERNAL > -# define _MALLOC_INTERNAL > -# include <malloc.h> > -#endif > - > -#ifndef __GNU_LIBRARY__ > -# define __sbrk sbrk > -#endif > - > -#ifdef __GNU_LIBRARY__ > -/* It is best not to declare this and cast its result on foreign operating > - systems with potentially hostile include files. */ > - > -# include <stddef.h> > -# include <stdlib.h> > -extern void *__sbrk (ptrdiff_t increment) __THROW; > -libc_hidden_proto (__sbrk) > -#endif > - > -#ifndef NULL > -# define NULL 0 OK. > +#if defined(SHARED) || defined(USE_MTAG) > +static bool __always_fail_morecore = false; > #endif > > /* Allocate INCREMENT more bytes of data space, > and return the start of data space, or NULL on errors. > If INCREMENT is negative, shrink data space. */ > void * > -__default_morecore (ptrdiff_t increment) > +__glibc_morecore (ptrdiff_t increment) > { > +#if defined(SHARED) || defined(USE_MTAG) > + if (__always_fail_morecore) > + return NULL; > +#endif OK. > + > void *result = (void *) __sbrk (increment); > if (result == (void *) -1) > return NULL; > > return result; > } > -libc_hidden_def (__default_morecore) > +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_34) > +compat_symbol (libc, __glibc_morecore, __default_morecore, GLIBC_2_0); OK. Use a new name and create a versioned compat symbol for __default_morecore in case someone calls it. > +#endif >
diff --git a/NEWS b/NEWS index 13ffe627da..18d9e65eb2 100644 --- a/NEWS +++ b/NEWS @@ -113,6 +113,11 @@ Deprecated and removed features, and other changes affecting compatibility: mtrace. Similar functionality can be achieved by using conditional breakpoints within mtrace functions from within gdb. +* The __morecore and __after_morecore_hook malloc hooks and the default + implementation __default_morecore have been removed from the API. Existing + applications will continue to link against these symbols but the interfaces + no longer have any effect on malloc. + Changes to build and runtime requirements: * On Linux, the shm_open, sem_open, and related functions now expect the diff --git a/include/stdlib.h b/include/stdlib.h index 1f6e1508e4..1c6f70b082 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -306,9 +306,6 @@ libc_hidden_proto (__qfcvt_r) # define MB_CUR_MAX (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MB_CUR_MAX)) # endif -extern void *__default_morecore (ptrdiff_t) __THROW; -libc_hidden_proto (__default_morecore) - struct abort_msg_s { unsigned int size; diff --git a/malloc/Makefile b/malloc/Makefile index d15729569b..020f781b59 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -104,7 +104,7 @@ tests-exclude-mcheck = tst-mallocstate \ tests-mcheck = $(filter-out $(tests-exclude-mcheck), $(tests)) endif -routines = malloc morecore mcheck mtrace obstack reallocarray \ +routines = malloc mcheck mtrace obstack reallocarray \ scratch_buffer_dupfree \ scratch_buffer_grow scratch_buffer_grow_preserve \ scratch_buffer_set_array_size \ diff --git a/malloc/arena.c b/malloc/arena.c index 991fc21a7e..f1693ed48f 100644 --- a/malloc/arena.c +++ b/malloc/arena.c @@ -274,14 +274,6 @@ next_env_entry (char ***position) #endif -#if defined(SHARED) || defined(USE_MTAG) -static void * -__failing_morecore (ptrdiff_t d) -{ - return (void *) MORECORE_FAILURE; -} -#endif - #ifdef SHARED extern struct dl_open_hook *_dl_open_hook; libc_hidden_proto (_dl_open_hook); @@ -310,7 +302,7 @@ ptmalloc_init (void) and that morecore does not support tagged regions, then disable it. */ if (__MTAG_SBRK_UNTAGGED) - __morecore = __failing_morecore; + __always_fail_morecore = true; mtag_enabled = true; mtag_mmap_flags = __MTAG_MMAP_FLAGS; @@ -323,7 +315,7 @@ ptmalloc_init (void) generic sbrk implementation also enforces this, but it is not used on Hurd. */ if (!__libc_initial) - __morecore = __failing_morecore; + __always_fail_morecore = true; #endif thread_arena = &main_arena; diff --git a/malloc/hooks.c b/malloc/hooks.c index 45c91d6502..4aa6dadcff 100644 --- a/malloc/hooks.c +++ b/malloc/hooks.c @@ -20,6 +20,8 @@ #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_34) void weak_variable (*__after_morecore_hook) (void) = NULL; compat_symbol (libc, __after_morecore_hook, __after_morecore_hook, GLIBC_2_0); +void *(*__morecore)(ptrdiff_t); +compat_symbol (libc, __morecore, __morecore, GLIBC_2_0); #endif /* Hooks for debugging versions. The initial hooks just call the diff --git a/malloc/malloc.c b/malloc/malloc.c index 24e7854a0e..6e8fa9e424 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -384,12 +384,11 @@ __malloc_assert (const char *assertion, const char *file, unsigned int line, #define TRIM_FASTBINS 0 #endif - /* Definition for getting more memory from the OS. */ -#define MORECORE (*__morecore) +#include "morecore.c" + +#define MORECORE (*__glibc_morecore) #define MORECORE_FAILURE 0 -void * __default_morecore (ptrdiff_t); -void *(*__morecore)(ptrdiff_t) = __default_morecore; /* Memory tagging. */ diff --git a/malloc/malloc.h b/malloc/malloc.h index 634b7db868..17ab9ee345 100644 --- a/malloc/malloc.h +++ b/malloc/malloc.h @@ -76,14 +76,6 @@ extern void *valloc (size_t __size) __THROW __attribute_malloc__ extern void *pvalloc (size_t __size) __THROW __attribute_malloc__ __wur __attr_dealloc_free; -/* Underlying allocation function; successive calls should return - contiguous pieces of memory. */ -extern void *(*__morecore) (ptrdiff_t __size) __MALLOC_DEPRECATED; - -/* Default value of `__morecore'. */ -extern void *__default_morecore (ptrdiff_t __size) -__THROW __attribute_malloc__ __MALLOC_DEPRECATED; - /* SVID2/XPG mallinfo structure */ struct mallinfo diff --git a/malloc/morecore.c b/malloc/morecore.c index 047228779b..8168ef158c 100644 --- a/malloc/morecore.c +++ b/malloc/morecore.c @@ -15,39 +15,27 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef _MALLOC_INTERNAL -# define _MALLOC_INTERNAL -# include <malloc.h> -#endif - -#ifndef __GNU_LIBRARY__ -# define __sbrk sbrk -#endif - -#ifdef __GNU_LIBRARY__ -/* It is best not to declare this and cast its result on foreign operating - systems with potentially hostile include files. */ - -# include <stddef.h> -# include <stdlib.h> -extern void *__sbrk (ptrdiff_t increment) __THROW; -libc_hidden_proto (__sbrk) -#endif - -#ifndef NULL -# define NULL 0 +#if defined(SHARED) || defined(USE_MTAG) +static bool __always_fail_morecore = false; #endif /* Allocate INCREMENT more bytes of data space, and return the start of data space, or NULL on errors. If INCREMENT is negative, shrink data space. */ void * -__default_morecore (ptrdiff_t increment) +__glibc_morecore (ptrdiff_t increment) { +#if defined(SHARED) || defined(USE_MTAG) + if (__always_fail_morecore) + return NULL; +#endif + void *result = (void *) __sbrk (increment); if (result == (void *) -1) return NULL; return result; } -libc_hidden_def (__default_morecore) +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_34) +compat_symbol (libc, __glibc_morecore, __default_morecore, GLIBC_2_0); +#endif